From 26e0658a7e068b4d0a227ccac137629285fceeba Mon Sep 17 00:00:00 2001
From: Casey McGinley <cmcginley@splunk.com>
Date: Wed, 15 Nov 2023 16:07:23 -0800
Subject: [PATCH 01/22] adding objects for TestGroup and IntegrationTest;
 adapting testing logic to shared attack data between 1+ tests; many flake8
 fixes

---
 .../DetectionTestingInfrastructure.py         | 503 +++++++++++++-----
 contentctl/contentctl.py                      | 241 +++++----
 .../detection_abstract.py                     | 105 ++--
 contentctl/objects/base_test.py               |  34 ++
 contentctl/objects/base_test_result.py        |  14 +
 contentctl/objects/detection.py               |  54 +-
 contentctl/objects/enums.py                   |  20 +-
 contentctl/objects/integration_test.py        |  32 ++
 contentctl/objects/integration_test_result.py |   5 +
 contentctl/objects/test_config.py             |   7 +-
 contentctl/objects/test_group.py              |  37 ++
 contentctl/objects/unit_test.py               |  31 +-
 contentctl/objects/unit_test_result.py        |  18 +-
 13 files changed, 757 insertions(+), 344 deletions(-)
 create mode 100644 contentctl/objects/base_test.py
 create mode 100644 contentctl/objects/base_test_result.py
 create mode 100644 contentctl/objects/integration_test.py
 create mode 100644 contentctl/objects/integration_test_result.py
 create mode 100644 contentctl/objects/test_group.py

diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
index 9a895edc..1a397e0d 100644
--- a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
+++ b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
@@ -1,51 +1,101 @@
-from pydantic import BaseModel, PrivateAttr, Field
-from dataclasses import dataclass
+import time
+import uuid
 import abc
+import os.path
+import configparser
+import json
+import datetime
+import tqdm
+import pathlib
+from tempfile import TemporaryDirectory, mktemp
+from ssl import SSLEOFError, SSLZeroReturnError
+from sys import stdout
+from dataclasses import dataclass
+from shutil import copyfile
+from typing import Union
+from enum import Enum
+
+from pydantic import BaseModel, PrivateAttr, Field
 import requests
 import splunklib.client as client
+from splunklib.binding import HTTPError
+from splunklib.results import JSONResultsReader, Message
+import splunklib.results
+from urllib3 import disable_warnings
+import urllib.parse
+
 from contentctl.objects.enums import PostTestBehavior, DetectionStatus
 from contentctl.objects.detection import Detection
-from contentctl.objects.unit_test_test import UnitTestTest
+from contentctl.objects.unit_test import UnitTest
 from contentctl.objects.unit_test_attack_data import UnitTestAttackData
 from contentctl.objects.unit_test_result import UnitTestResult
 from contentctl.objects.test_config import TestConfig, Infrastructure
-from shutil import copyfile
-from splunklib.binding import HTTPError
-from splunklib.results import JSONResultsReader, Message
-import os.path
-import configparser
-from ssl import SSLEOFError, SSLZeroReturnError
-import time
-import uuid
-from sys import stdout
-
-from tempfile import TemporaryDirectory, mktemp
-import pathlib
+from contentctl.objects.test_group import TestGroup
 from contentctl.helper.utils import Utils
 from contentctl.actions.detection_testing.DataManipulation import (
     DataManipulation,
 )
-import splunklib.results
-from urllib3 import disable_warnings
-import urllib.parse
-import json
-from typing import Union
-import datetime
-import tqdm
 
 
+class TestReportingType(str, Enum):
+    """
+    5-char identifiers for the type of testing being reported on
+    """
+    # Reporting around general testing setup (e.g. infra, role configuration)
+    SETUP = "setup"
+
+    # Reporting around a group of tests
+    GROUP = "group"
+
+    # Reporting around a unit test
+    UNIT = "unit "
+
+    # Reporting around an integration test
+    INTEGRATION = "integ"
+
+
+class TestingStates(str, Enum):
+    """
+    Defined testing states
+    """
+    BEGINNING_GROUP = "Beginning Test Group"
+    BEGINNING_TEST = "Beginning Test"
+    DOWNLOADING = "Downloading Data"
+    REPLAYING = "Replaying Data"
+    PROCESSING = "Waiting for Processing"
+    SEARCHING = "Running Search"
+    DELETING = "Deleting Data"
+
+
+# the longest length of any state
+LONGEST_STATE = max(len(w.value) for w in TestingStates)
+
 
+class FinalTestingStates(str, Enum):
+    """
+    The possible final states for a test (for pbar reporting)
+    """
+    FAIL = "\x1b[0;30;41m" + "FAIL".ljust(LONGEST_STATE) + "\x1b[0m"
+    PASS = "\x1b[0;30;42m" + "PASS".ljust(LONGEST_STATE) + "\x1b[0m"
+
+
+# max length of a test name
 MAX_TEST_NAME_LENGTH = 70
-TESTING_STATES = [
-    "Downloading Data",
-    "Replaying Data",
-    "Waiting for Processing",
-    "Running Search",
-    "Deleting Data",
-]
 
-LONGEST_STATE = max(len(w) for w in TESTING_STATES)
-PBAR_FORMAT_STRING = "{test_name} >> {state} | Time: {time}"
+# The format string used for pbar reporting
+PBAR_FORMAT_STRING = "({test_reporting_type}) {test_name} >> {state} | Time: {time}"
+
+
+class SetupTestGroupResults(BaseModel):
+    exception: Union[Exception, None] = None
+    success: bool = True
+    duration: float = 0
+    start_time: float = 0
+
+
+class CleanupTestGroupResults(BaseModel):
+    duration: float = 0
+    start_time: float = 0
 
 
 class ContainerStoppedException(Exception):
@@ -105,14 +155,14 @@ def setup(self):
             mininterval=0,
             file=stdout
         )
-        
+
         self.start_time = time.time()
         try:
             for func, msg in [
                 (self.start, "Starting"),
                 (self.get_conn, "Waiting for App Installation"),
                 (self.configure_conf_file_datamodels, "Configuring Datamodels"),
-                (self.create_replay_index,f"Create index '{self.sync_obj.replay_index}'"),
+                (self.create_replay_index, f"Create index '{self.sync_obj.replay_index}'"),
                 (self.configure_imported_roles, "Configuring Roles"),
                 (self.configure_delete_indexes, "Configuring Indexes"),
                 (self.configure_hec, "Configuring HEC"),
@@ -120,7 +170,11 @@ def setup(self):
             ]:
 
                 self.format_pbar_string(
-                    self.get_name(), msg, self.start_time, update_sync_status=True
+                    TestReportingType.SETUP,
+                    self.get_name(),
+                    msg,
+                    self.start_time,
+                    update_sync_status=True,
                 )
                 func()
                 self.check_for_teardown()
@@ -130,7 +184,7 @@ def setup(self):
             self.finish()
             return
 
-        self.format_pbar_string(self.get_name(), "Finished Setup!", self.start_time)
+        self.format_pbar_string(TestReportingType.SETUP, self.get_name(), "Finished Setup!", self.start_time)
 
     def wait_for_ui_ready(self):
         self.get_conn()
@@ -143,7 +197,7 @@ def configure_hec(self):
             )
             self.hec_token = str(res.token)
             return
-        except Exception as e:
+        except Exception:
             # HEC input does not exist.  That's okay, we will create it
             pass
 
@@ -170,7 +224,7 @@ def get_conn(self) -> client.Service:
                 # continue trying to re-establish a connection until after
                 # the server has restarted
                 self.connect_to_api()
-        except Exception as e:
+        except Exception:
             # there was some issue getting the connection. Try again just once
             self.connect_to_api()
         return self._conn
@@ -196,6 +250,7 @@ def connect_to_api(self, sleep_seconds: int = 5):
 
                 if conn.restart_required:
                     self.format_pbar_string(
+                        TestReportingType.SETUP,
                         self.get_name(),
                         "Waiting for reboot",
                         self.start_time,
@@ -208,9 +263,9 @@ def connect_to_api(self, sleep_seconds: int = 5):
 
             except ConnectionRefusedError as e:
                 raise (e)
-            except SSLEOFError as e:
+            except SSLEOFError:
                 pass
-            except SSLZeroReturnError as e:
+            except SSLZeroReturnError:
                 pass
             except ConnectionResetError:
                 pass
@@ -225,6 +280,7 @@ def connect_to_api(self, sleep_seconds: int = 5):
 
             for _ in range(sleep_seconds):
                 self.format_pbar_string(
+                    TestReportingType.SETUP,
                     self.get_name(),
                     "Getting API Connection",
                     self.start_time,
@@ -247,7 +303,7 @@ def create_replay_index(self):
     def configure_imported_roles(
         self,
         imported_roles: list[str] = ["user", "power", "can_delete"],
-        enterprise_security_roles: list[str]= ["ess_admin", "ess_analyst", "ess_user"],
+        enterprise_security_roles: list[str] = ["ess_admin", "ess_analyst", "ess_user"],
         indexes: list[str] = ["_*", "*"],
     ):
         indexes.append(self.sync_obj.replay_index)
@@ -264,7 +320,7 @@ def configure_imported_roles(
             self.pbar.write(
                 f"Enterprise Security Roles do not exist:'{enterprise_security_roles}: {str(e)}"
             )
-    
+
         self.get_conn().roles.post(
             self.infrastructure.splunk_app_username,
             imported_roles=imported_roles,
@@ -288,14 +344,17 @@ def wait_for_conf_file(self, app_name: str, conf_file_name: str):
             self.check_for_teardown()
             time.sleep(1)
             try:
-                res = self.get_conn().get(
+                _ = self.get_conn().get(
                     f"configs/conf-{conf_file_name}", app=app_name
                 )
                 return
-            except Exception as e:
+            except Exception:
                 pass
                 self.format_pbar_string(
-                    self.get_name(), "Configuring Datamodels", self.start_time
+                    TestReportingType.SETUP,
+                    self.get_name(),
+                    "Configuring Datamodels",
+                    self.start_time,
                 )
 
     def configure_conf_file_datamodels(self, APP_NAME: str = "Splunk_SA_CIM"):
@@ -323,7 +382,8 @@ def configure_conf_file_datamodels(self, APP_NAME: str = "Splunk_SA_CIM"):
 
         if not cim_acceleration_datamodels.is_file():
             self.pbar.write(
-                f"******************************\nDATAMODEL ACCELERATION FILE {str(cim_acceleration_datamodels)} NOT FOUND. CIM DATAMODELS NOT ACCELERATED\n******************************\n"
+                f"******************************\nDATAMODEL ACCELERATION FILE {str(cim_acceleration_datamodels)} NOT "
+                "FOUND. CIM DATAMODELS NOT ACCELERATED\n******************************\n"
             )
         else:
             parser.read(cim_acceleration_datamodels)
@@ -334,7 +394,7 @@ def configure_conf_file_datamodels(self, APP_NAME: str = "Splunk_SA_CIM"):
                 continue
             for name, value in parser[datamodel_name].items():
                 try:
-                    res = self.get_conn().post(
+                    _ = self.get_conn().post(
                         f"properties/datamodels/{datamodel_name}/{name}",
                         app=APP_NAME,
                         value=value,
@@ -349,7 +409,7 @@ def execute(self):
         while True:
             try:
                 self.check_for_teardown()
-            except ContainerStoppedException as e:
+            except ContainerStoppedException:
                 self.finish()
                 return
 
@@ -360,7 +420,7 @@ def execute(self):
                     self.pbar.write(f"\nSkipping {detection.name} since it is status: {detection.status}\n")
                     continue
                 self.sync_obj.currentTestingQueue[self.get_name()] = detection
-            except IndexError as e:
+            except IndexError:
                 # self.pbar.write(
                 #    f"No more detections to test, shutting down {self.get_name()}"
                 # )
@@ -368,7 +428,7 @@ def execute(self):
                 return
             try:
                 self.test_detection(detection)
-            except ContainerStoppedException as e:
+            except ContainerStoppedException:
                 self.pbar.write(f"Stopped container [{self.get_name()}]")
                 self.finish()
                 return
@@ -383,24 +443,114 @@ def test_detection(self, detection: Detection):
             self.pbar.write(f"No test(s) found for {detection.name}")
             return
 
-        for test in detection.tests:
-            self.execute_test(detection, test)
+        # iterate TestGroups
+        for test_group in detection.test_groups:
+            # replay attack_data
+            setup_results = self.setup_test_group(test_group)
+
+            # TODO: make it clear to Eric that test durations will no longer account for
+            # replay/cleanup in this new test grouping model (in the CLI reporting; the recorded
+            # test duration in the result file will include the setup/cleanup time in the duration
+            # for both unit and integration tests (effectively double counted))
+
+            # run unit test
+            self.execute_unit_test(detection, test_group.unit_test, setup_results)
+
+            # run integration test (if enabled)
+            if self.global_config.enable_integration_testing:
+                self.execute_integration_test(
+                    detection,
+                    test_group.integration_test,
+                    setup_results
+                )
+
+            # cleanup
+            cleanup_results = self.cleanup_test_group(test_group, setup_results.start_time)
+
+            # update the results duration w/ the setup/cleanup time
+            if test_group.unit_test.result is not None:
+                test_group.unit_test.result.duration = round(
+                    test_group.unit_test.result.duration + setup_results.duration + cleanup_results.duration,
+                    2
+                )
+            if test_group.integration_test.result is not None:
+                test_group.integration_test.result.duration = round(
+                    test_group.integration_test.result.duration + setup_results.duration + cleanup_results.duration,
+                    2
+                )
+
+    def setup_test_group(self, test_group: TestGroup) -> SetupTestGroupResults:
+        """
+        Executes attack_data replay, captures test group start time, does some reporting to the CLI
+        and returns an object encapsulating the results of data replay
+        :param test_group: the TestGroup to replay for
+        :returns: SetupTestGroupResults
+        """
+        # Capture the setup start time
+        setup_start_time = time.time()
+        self.pbar.reset()
+        self.format_pbar_string(
+            TestReportingType.GROUP,
+            test_group.name,
+            TestingStates.BEGINNING_GROUP.value,
+            setup_start_time
+        )
+        # https://github.com/WoLpH/python-progressbar/issues/164
+        # Use NullBar if there is more than 1 container or we are running
+        # in a non-interactive context
+
+        results = SetupTestGroupResults()
+
+        try:
+            self.replay_attack_data_files(test_group, setup_start_time)
+        except Exception as e:
+            results.exception = e
+            results.success = False
+
+        results.duration = time.time() - setup_start_time
+        return results
+
+    def cleanup_test_group(
+            self,
+            test_group: TestGroup,
+            test_group_start_time: float
+    ) -> CleanupTestGroupResults:
+        cleanup_start_time = time.time()
+        self.format_pbar_string(
+            TestReportingType.GROUP,
+            test_group.name,
+            TestingStates.DELETING.value,
+            test_group_start_time,
+        )
+
+        # TODO: do we want to clean up even if replay failed? Could have been partial failure?
+        self.delete_attack_data(test_group.attack_data)
+
+        return CleanupTestGroupResults(
+            duration=time.time() - cleanup_start_time,
+            start_time=cleanup_start_time
+        )
 
     def format_pbar_string(
         self,
+        test_reporting_type: TestReportingType,
         test_name: str,
         state: str,
         start_time: Union[float, None],
         set_pbar: bool = True,
         update_sync_status: bool = False,
     ) -> str:
-        if start_time == None:
+        if start_time is None:
             start_time = self.start_time
-        field_one = test_name.ljust(MAX_TEST_NAME_LENGTH)
-        field_two = state.ljust(LONGEST_STATE)
-        field_three = datetime.timedelta(seconds=round(time.time() - start_time))
+        field_one = test_reporting_type.value
+        field_two = test_name.ljust(MAX_TEST_NAME_LENGTH)
+        field_three = state.ljust(LONGEST_STATE)
+        field_four = datetime.timedelta(seconds=round(time.time() - start_time))
         new_string = PBAR_FORMAT_STRING.format(
-            test_name=field_one, state=field_two, time=field_three
+            test_reporting_type=field_one,
+            test_name=field_two,
+            state=field_three,
+            time=field_four,
         )
         if set_pbar:
             self.pbar.bar_format = new_string
@@ -412,29 +562,50 @@ def format_pbar_string(
             }
         return new_string
 
-    def execute_test(
-        self, detection: Detection, test: UnitTestTest, FORCE_ALL_TIME: bool = True
+    def execute_unit_test(
+        self,
+        detection: Detection,
+        test: UnitTest,
+        setup_results: SetupTestGroupResults,
+        FORCE_ALL_TIME: bool = True
     ):
-        start_time = time.time()
+        """
+        Execute a unit test and set its results appropriately
+        :param detection: the detection being tested
+        :param test: the specific test case (UnitTest)
+        :param setup_results: the results of test group setup
+        :param FORCE_ALL_TIME: boolean flag; if True, searches check data for all time; if False,
+            any earliest_time or latest_time configured in the test is respected
+        """
+        # Capture unit test start time
+        test_start_time = time.time()
+
+        # Reset the pbar and print that we are beginning a unit test
         self.pbar.reset()
-        self.format_pbar_string(f"{detection.name}:{test.name}", "Beginning Test", start_time)
-        # https://github.com/WoLpH/python-progressbar/issues/164
-        # Use NullBar if there is more than 1 container or we are running
-        # in a non-interactive context
-
-        try:
-            self.replay_attack_data_files(test.attack_data, test, start_time)
-        except Exception as e:
+        self.format_pbar_string(
+            TestReportingType.UNIT,
+            f"{detection.name}:{test.name}",
+            TestingStates.BEGINNING_TEST,
+            test_start_time,
+        )
 
+        # if the replay failed, record the test failure and return
+        if not setup_results.success:
             test.result = UnitTestResult()
             test.result.set_job_content(
-                None, self.infrastructure, exception=e, duration=time.time() - start_time
+                None,
+                self.infrastructure,
+                exception=setup_results.exception,
+                duration=round(time.time() - test_start_time, 2)
             )
+
+            # report the failure to the CLI
             self.pbar.write(
                 self.format_pbar_string(
+                    TestReportingType.UNIT,
                     f"{detection.name}:{test.name}",
-                    "\x1b[0;30;41m" + "FAIL".ljust(LONGEST_STATE) + "\x1b[0m",
-                    start_time=time.time() - start_time,
+                    FinalTestingStates.FAIL.value,
+                    start_time=test_start_time,
                     set_pbar=False,
                 )
             )
@@ -443,33 +614,43 @@ def execute_test(
 
         # Set the mode and timeframe, if required
         kwargs = {"exec_mode": "blocking"}
-        for baseline in test.baselines:
 
-            self.retry_search_until_timeout(detection, test, kwargs, start_time)
+        # Iterate over baselines (if any)
+        for baseline in test.baselines:
+            # TODO: this is executing the test, not the baseline...
+            # TODO: should this be in a try/except if the later call is?
+            self.retry_search_until_timeout(detection, test, kwargs, test_start_time)
 
+        # Set earliest_time and latest_time appropriately if FORCE_ALL_TIME is False
         if not FORCE_ALL_TIME:
             if test.earliest_time is not None:
                 kwargs.update({"earliest_time": test.earliest_time})
             if test.latest_time is not None:
                 kwargs.update({"latest_time": test.latest_time})
 
+        # Run the detection's search query
         try:
-            self.retry_search_until_timeout(detection, test, kwargs, start_time)
+            self.retry_search_until_timeout(detection, test, kwargs, test_start_time)
         except ContainerStoppedException as e:
-            raise (e)
+            raise e
         except Exception as e:
+            # Init the test result and record a failure if there was an issue during the search
             test.result = UnitTestResult()
             test.result.set_job_content(
-                None, self.infrastructure, exception=e, duration=time.time() - start_time
+                None, self.infrastructure, exception=e, duration=time.time() - test_start_time
             )
 
+        # Pause here if the terminate flag has NOT been set AND either of the below are true:
+        #   1. the behavior is always_pause
+        #   2. the behavior is pause_on_failure and the test failed
         if (
             self.global_config.post_test_behavior == PostTestBehavior.always_pause
             or (
                 self.global_config.post_test_behavior == PostTestBehavior.pause_on_failure
-                and (test.result is None or test.result.success == False)
+                and (test.result is None or test.result.success is False)
             )
         ) and not self.sync_obj.terminate:
+            # Determine the state to report to the user
             if test.result is None:
                 res = "ERROR"
                 link = detection.search
@@ -482,23 +663,25 @@ def execute_test(
                 link = test.result.get_summary_dict()["sid_link"]
 
             self.format_pbar_string(
-                f"{detection.name}:{test.name}", f"{res} - {link} (CTRL+D to continue)", start_time
+                TestReportingType.UNIT,
+                f"{detection.name}:{test.name}",
+                f"{res} - {link} (CTRL+D to continue)",
+                test_start_time,
             )
 
+            # Wait for user input
             try:
                 _ = input()
-            except Exception as e:
+            except Exception:
                 pass
 
-        self.format_pbar_string(f"{detection.name}:{test.name}", f"Deleting Data", start_time)
-        self.delete_attack_data(test.attack_data)
-
         if test.result is not None and test.result.success:
             self.pbar.write(
                 self.format_pbar_string(
+                    TestReportingType.UNIT,
                     f"{detection.name}:{test.name}",
-                    "\x1b[0;30;42m" + "PASS".ljust(LONGEST_STATE) + "\x1b[0m",
-                    start_time,
+                    FinalTestingStates.PASS.value,
+                    test_start_time,
                     set_pbar=False,
                 )
             )
@@ -506,33 +689,50 @@ def execute_test(
         else:
             self.pbar.write(
                 self.format_pbar_string(
+                    TestReportingType.UNIT,
                     f"{detection.name}:{test.name}",
-                    "\x1b[0;30;41m" + "FAIL".ljust(LONGEST_STATE) + "\x1b[0m",
-                    start_time,
+                    FinalTestingStates.FAIL.value,
+                    test_start_time,
                     set_pbar=False,
                 )
             )
         stdout.flush()
         if test.result is not None:
-            test.result.duration = round(time.time() - start_time, 2)
+            test.result.duration = round(time.time() - test_start_time, 2)
+
+    def execute_integration_test(self, detection: Detection) -> None:
+        """
+        Executes an integration test on the detection
+        :param detection: the detection on which to run the test
+        """
+        raise NotImplementedError
 
     def retry_search_until_timeout(
         self,
         detection: Detection,
-        test: UnitTestTest,
+        test: UnitTest,
         kwargs: dict,
         start_time: float,
     ):
+        """
+        Retries a search until the timeout is reached, setting test results appropriately
+        :param detection: the detection being tested
+        :param test: the UnitTest case being tested
+        :param kwargs: any additional keyword args to be passed with the search
+        :param start_time: the start time of the caller
+        """
+        # Get the start time and compute the timeout
         search_start_time = time.time()
         search_stop_time = time.time() + self.sync_obj.timeout_seconds
 
+        # We will default to ensuring at least one result exists
         if test.pass_condition is None:
-            # we will default to ensuring at least one result exists
             search = detection.search
         else:
+            # Else, use the explicit pass condition
             search = f"{detection.search} {test.pass_condition}"
 
-        # Search that do not begin with '|' must begin with 'search '
+        # Ensure searches that do not begin with '|' must begin with 'search '
         if not search.strip().startswith("|"):
             if not search.strip().startswith("search "):
                 search = f"search {search}"
@@ -540,38 +740,56 @@ def retry_search_until_timeout(
         # exponential backoff for wait time
         tick = 2
 
+        # Retry until timeout
         while time.time() < search_stop_time:
 
+            # This loop allows us to capture shutdown events without being
+            # stuck in an extended sleep. Remember that this raises an exception
             for _ in range(pow(2, tick - 1)):
-                # This loop allows us to capture shutdown events without being
-                # stuck in an extended sleep. Remember that this raises an exception
                 self.check_for_teardown()
-                self.format_pbar_string(f"{detection.name}:{test.name}", "Waiting for Processing", start_time)
+                self.format_pbar_string(
+                    TestReportingType.UNIT,
+                    f"{detection.name}:{test.name}",
+                    TestingStates.PROCESSING.value,
+                    start_time
+                )
 
                 time.sleep(1)
 
-            self.format_pbar_string(f"{detection.name}:{test.name}", "Running Search", start_time)
+            self.format_pbar_string(
+                TestReportingType.UNIT,
+                f"{detection.name}:{test.name}",
+                TestingStates.SEARCHING.value,
+                start_time,
+            )
 
+            # Execute the search and read the results
             job = self.get_conn().search(query=search, **kwargs)
-
             results = JSONResultsReader(job.results(output_mode="json"))
-                        
+
+            # Consolidate a set of the distinct observable field names
             observable_fields_set = set([o.name for o in detection.tags.observable])
-        
+
+            # Ensure the search had at least one result
             if int(job.content.get("resultCount", "0")) > 0:
+                # Initialize the test result
                 test.result = UnitTestResult()
+
+                # Initialize the collection of fields that are empty that shouldn't be
                 empty_fields = set()
+
+                # Filter out any messages in the results
                 for result in results:
                     if isinstance(result, Message):
                         continue
 
-                    #otherwise it is a dict and we will process is
+                    # If not a message, it is a dict and we will process it
                     results_fields_set = set(result.keys())
-                
-                    missing_fields = observable_fields_set - results_fields_set
 
-                    
+                    # Identify any observable fields that are not available in the results
+                    missing_fields = observable_fields_set - results_fields_set
                     if len(missing_fields) > 0:
+                        # Report a failure in such cases
                         e = Exception(f"The observable field(s) {missing_fields} are missing in the detection results")
                         test.result.set_job_content(
                             job.content,
@@ -580,37 +798,36 @@ def retry_search_until_timeout(
                             success=False,
                             duration=time.time() - search_start_time,
                             )
-                        
-                        
+
                         return
-                
 
-                    
-                    
                     # If we find one or more fields that contain the string "null" then they were
                     # not populated and we should throw an error.  This can happen if there is a typo
                     # on a field.  In this case, the field will appear but will not contain any values
                     current_empty_fields = set()
                     for field in observable_fields_set:
-                        if result.get(field,'null') == 'null':
+                        if result.get(field, 'null') == 'null':
                             current_empty_fields.add(field)
 
-                    
+                    # If everything succeeded up until now, and no empty fields are found in the
+                    # current result, then the search was a success
                     if len(current_empty_fields) == 0:
                         test.result.set_job_content(
-                        job.content,
-                        self.infrastructure,
-                        success=True,
-                        duration=time.time() - search_start_time,
+                            job.content,
+                            self.infrastructure,
+                            success=True,
+                            duration=time.time() - search_start_time,
                         )
                         return
-                    
+
                     else:
                         empty_fields = empty_fields.union(current_empty_fields)
-                    
-                
-                e = Exception(f"One or more required observable fields {empty_fields} contained 'null' values.  Is the data being "
-                               "parsed correctly or is there an error in the naming of a field?")                
+
+                # Report a failure if there were empty fields in all results
+                e = Exception(
+                    f"One or more required observable fields {empty_fields} contained 'null' values.  Is the "
+                    "data being parsed correctly or is there an error in the naming of a field?"
+                    )
                 test.result.set_job_content(
                     job.content,
                     self.infrastructure,
@@ -618,10 +835,11 @@ def retry_search_until_timeout(
                     success=False,
                     duration=time.time() - search_start_time,
                 )
-                
+
                 return
-                
+
             else:
+                # Report a failure if there were no results at all
                 test.result = UnitTestResult()
                 test.result.set_job_content(
                     job.content,
@@ -630,9 +848,7 @@ def retry_search_until_timeout(
                     duration=time.time() - search_start_time,
                 )
                 tick += 1
-        
-            
-        
+
         return
 
     def delete_attack_data(self, attack_data_files: list[UnitTestAttackData]):
@@ -645,7 +861,8 @@ def delete_attack_data(self, attack_data_files: list[UnitTestAttackData]):
 
                 job = self.get_conn().jobs.create(splunk_search, **kwargs)
                 results_stream = job.results(output_mode="json")
-                reader = splunklib.results.JSONResultsReader(results_stream)
+                # TODO: should we be doing something w/ this reader?
+                _ = splunklib.results.JSONResultsReader(results_stream)
 
             except Exception as e:
                 raise (
@@ -656,22 +873,21 @@ def delete_attack_data(self, attack_data_files: list[UnitTestAttackData]):
 
     def replay_attack_data_files(
         self,
-        attack_data_files: list[UnitTestAttackData],
-        test: UnitTestTest,
-        start_time: float,
+        test_group: TestGroup,
+        test_group_start_time: float,
     ):
         with TemporaryDirectory(prefix="contentctl_attack_data") as attack_data_dir:
-            for attack_data_file in attack_data_files:
+            for attack_data_file in test_group.attack_data:
                 self.replay_attack_data_file(
-                    attack_data_file, attack_data_dir, test, start_time
+                    attack_data_file, attack_data_dir, test_group, test_group_start_time
                 )
 
     def replay_attack_data_file(
         self,
         attack_data_file: UnitTestAttackData,
         tmp_dir: str,
-        test: UnitTestTest,
-        start_time: float,
+        test_group: TestGroup,
+        test_group_start_time: float,
     ):
         tempfile = mktemp(dir=tmp_dir)
 
@@ -680,20 +896,17 @@ def replay_attack_data_file(
             or attack_data_file.data.startswith("http://")
         ):
             if pathlib.Path(attack_data_file.data).is_file():
-                self.format_pbar_string(test.name, "Copying Data", start_time)
+                self.format_pbar_string(TestReportingType.GROUP, test_group.name, "Copying Data", test_group_start_time)
                 try:
                     copyfile(attack_data_file.data, tempfile)
                 except Exception as e:
-                    raise (
-                        Exception(
-                            f"Error copying local Attack Data File for [{Detection.name}] - [{attack_data_file.data}]: {str(e)}"
-                        )
+                    raise Exception(
+                        f"Error copying local Attack Data File for [{Detection.name}] - [{attack_data_file.data}]: "
+                        f"{str(e)}"
                     )
             else:
-                raise (
-                    Exception(
-                        f"Attack Data File for [{Detection.name}] is local [{attack_data_file.data}], but does not exist."
-                    )
+                raise Exception(
+                    f"Attack Data File for [{Detection.name}] is local [{attack_data_file.data}], but does not exist."
                 )
 
         else:
@@ -703,7 +916,12 @@ def replay_attack_data_file(
             try:
                 # In case the path is a local file, try to get it
 
-                self.format_pbar_string(test.name, "Downloading Data", start_time)
+                self.format_pbar_string(
+                    TestReportingType.GROUP,
+                    test_group.name,
+                    TestingStates.DOWNLOADING.value,
+                    test_group_start_time
+                )
 
                 Utils.download_file_from_http(
                     attack_data_file.data, tempfile, self.pbar, overwrite_file=True
@@ -723,7 +941,12 @@ def replay_attack_data_file(
             )
 
         # Upload the data
-        self.format_pbar_string(test.name, "Replaying Data", start_time)
+        self.format_pbar_string(
+            TestReportingType.GROUP,
+            test_group.name,
+            TestingStates.REPLAYING.value,
+            test_group_start_time
+        )
 
         self.hec_raw_replay(tempfile, attack_data_file)
 
diff --git a/contentctl/contentctl.py b/contentctl/contentctl.py
index 2cc402c6..fd95b51b 100644
--- a/contentctl/contentctl.py
+++ b/contentctl/contentctl.py
@@ -4,7 +4,6 @@
 import tqdm
 import functools
 from typing import Union
-import yaml
 import pathlib
 
 from contentctl.actions.detection_testing.GitHubService import (
@@ -28,7 +27,8 @@
     SecurityContentProduct,
     DetectionTestingMode,
     PostTestBehavior,
-    DetectionTestingTargetInfrastructure
+    DetectionTestingTargetInfrastructure,
+    SigmaConverterTarget
 )
 from contentctl.input.new_content_generator import NewContentGeneratorInputDto
 from contentctl.helper.config_handler import ConfigHandler
@@ -36,15 +36,15 @@
 from contentctl.objects.config import Config
 
 from contentctl.objects.app import App
-from contentctl.objects.test_config import TestConfig, Infrastructure
-from contentctl.actions.test import Test, TestInputDto, TestOutputDto
-from contentctl.objects.enums import *
-from contentctl.input.sigma_converter import *
-from contentctl.actions.convert import *
+from contentctl.objects.test_config import Infrastructure
+from contentctl.actions.test import Test, TestInputDto
+from contentctl.input.sigma_converter import SigmaConverterInputDto
+from contentctl.actions.convert import ConvertInputDto, Convert
 
 
 SERVER_ARGS_ENV_VARIABLE = "CONTENTCTL_TEST_INFRASTRUCTURES"
 
+
 def configure_unattended(args: argparse.Namespace) -> argparse.Namespace:
     # disable all calls to tqdm - this is so that CI/CD contexts don't
     # have a large amount of output due to progress bar updates.
@@ -54,7 +54,9 @@ def configure_unattended(args: argparse.Namespace) -> argparse.Namespace:
     if args.unattended:
         if args.behavior != PostTestBehavior.never_pause.name:
             print(
-                f"For unattended mode, --behavior MUST be {PostTestBehavior.never_pause.name}.\nUpdating the behavior from '{args.behavior}' to '{PostTestBehavior.never_pause.name}'"
+                f"For unattended mode, --behavior MUST be {PostTestBehavior.never_pause.name}.\n"
+                f"Updating the behavior from '{args.behavior}' to "
+                f"'{PostTestBehavior.never_pause.name}'"
             )
             args.behavior = PostTestBehavior.never_pause.name
 
@@ -64,7 +66,7 @@ def configure_unattended(args: argparse.Namespace) -> argparse.Namespace:
 def print_ascii_art():
     print(
         """
-Running Splunk Security Content Control Tool (contentctl) 
+Running Splunk Security Content Control Tool (contentctl)
 ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
 ⠀⠀⠀⠀⠀⠀⠀⠀⠀⢶⠛⡇⠀⠀⠀⠀⠀⠀⣠⣦⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
 ⠀⠀⠀⠀⠀⠀⠀⠀⣀⠼⠖⠛⠋⠉⠉⠓⠢⣴⡻⣾⡇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
@@ -92,21 +94,19 @@ def print_ascii_art():
     )
 
 
-def start(args, read_test_file:bool = False) -> Config:
+def start(args, read_test_file: bool = False) -> Config:
     base_config = ConfigHandler.read_config(pathlib.Path(args.path)/"contentctl.yml")
     if read_test_file:
         base_config.test = ConfigHandler.read_test_config(pathlib.Path(args.path)/"contentctl_test.yml", args.mode)
     return base_config
 
 
-
-
 def initialize(args) -> None:
     Initialize().execute(InitializeInputDto(path=pathlib.Path(args.path), demo=args.demo))
 
 
-def build(args, config:Union[Config,None]=None) -> DirectorOutputDto:
-    if config == None:
+def build(args, config: Union[Config, None] = None) -> DirectorOutputDto:
+    if config is None:
         config = start(args)
     if args.type == "app":
         product_type = SecurityContentProduct.SPLUNK_APP
@@ -115,14 +115,18 @@ def build(args, config:Union[Config,None]=None) -> DirectorOutputDto:
     elif args.type == "api":
         product_type = SecurityContentProduct.API
     else:
-        print(f"Invalid build type. Valid options app, ssa or api")
+        print("Invalid build type. Valid options app, ssa or api")
         sys.exit(1)
     director_input_dto = DirectorInputDto(
-        input_path=os.path.abspath(args.path), 
-        product=product_type, 
+        input_path=os.path.abspath(args.path),
+        product=product_type,
         config=config
     )
-    generate_input_dto = GenerateInputDto(director_input_dto, args.appinspect_api_username, args.appinspect_api_password)
+    generate_input_dto = GenerateInputDto(
+        director_input_dto,
+        args.appinspect_api_username,
+        args.appinspect_api_password
+    )
 
     generate = Generate()
 
@@ -133,18 +137,20 @@ def api_deploy(args) -> None:
     config = start(args)
     deploy_input_dto = API_DeployInputDto(path=pathlib.Path(args.path), config=config)
     deploy = API_Deploy()
-    
+
     deploy.execute(deploy_input_dto)
 
+
 def acs_deploy(args) -> None:
-    config = start(args)
+    _ = start(args)
     raise NotImplementedError("ACS Deploy is not yet implemented.")
 
+
 def test(args: argparse.Namespace):
     args = configure_unattended(args)
 
     config = start(args, read_test_file=True)
-    
+
     if config.test is None:
         raise Exception("Error parsing test configuration. Test Object was None.")
 
@@ -152,62 +158,74 @@ def test(args: argparse.Namespace):
     # yet exposed/written properly in
     # the config file
     if args.infrastructure is not None:
-        config.test.infrastructure_config.infrastructure_type = DetectionTestingTargetInfrastructure(args.infrastructure)
+        config.test.infrastructure_config.infrastructure_type = DetectionTestingTargetInfrastructure(
+            args.infrastructure
+        )
     if args.mode is not None:
-        config.test.mode=DetectionTestingMode(args.mode) 
+        config.test.mode = DetectionTestingMode(args.mode)
     if args.behavior is not None:
-        config.test.post_test_behavior=PostTestBehavior(args.behavior)
+        config.test.post_test_behavior = PostTestBehavior(args.behavior)
     if args.detections_list is not None:
-        config.test.detections_list=args.detections_list
-
+        config.test.detections_list = args.detections_list
+    if args.enable_integration_testing or config.test.enable_integration_testing:
+        config.test.enable_integration_testing = True
 
-    
+    # validate and setup according to infrastructure type
     if config.test.infrastructure_config.infrastructure_type == DetectionTestingTargetInfrastructure.container:
         if args.num_containers is None:
-            raise Exception("Error - trying to start a test using container infrastructure but no value for --num_containers was found")
-        config.test.infrastructure_config.infrastructures = Infrastructure.get_infrastructure_containers(args.num_containers)
+            raise Exception(
+                "Error - trying to start a test using container infrastructure but no value for --num_containers was "
+                "found"
+            )
+        config.test.infrastructure_config.infrastructures = Infrastructure.get_infrastructure_containers(
+            args.num_containers
+        )
     elif config.test.infrastructure_config.infrastructure_type == DetectionTestingTargetInfrastructure.server:
         if args.server_info is None and os.environ.get(SERVER_ARGS_ENV_VARIABLE) is None:
             if len(config.test.infrastructure_config.infrastructures) == 0:
-                raise Exception("Error - trying to start a test using server infrastructure, but server information was not stored "
-                                "in contentctl_test.yml or passed on the command line. Please see the documentation for --server_info "
-                                "at the command line or 'infrastructures' in contentctl.yml.")
+                raise Exception(
+                    "Error - trying to start a test using server infrastructure, but server information was not stored "
+                    "in contentctl_test.yml or passed on the command line. Please see the documentation for "
+                    "--server_info at the command line or 'infrastructures' in contentctl.yml."
+                )
             else:
                 print("Using server configuration from: [contentctl_test.yml infrastructures section]")
-        
+
         else:
             if args.server_info is not None:
                 print("Using server configuration from: [command line]")
                 pass
             elif os.environ.get(SERVER_ARGS_ENV_VARIABLE) is not None:
-                args.server_info = os.environ.get(SERVER_ARGS_ENV_VARIABLE,"").split(';')
+                args.server_info = os.environ.get(SERVER_ARGS_ENV_VARIABLE, "").split(';')
                 print(f"Using server configuration from: [{SERVER_ARGS_ENV_VARIABLE} environment variable]")
             else:
-                raise Exception(f"Server infrastructure information not passed in contentctl_test.yml file, using --server_info switch on the command line, or in the {SERVER_ARGS_ENV_VARIABLE} environment variable")
+                raise Exception(
+                    "Server infrastructure information not passed in contentctl_test.yml file, using --server_info "
+                    f"switch on the command line, or in the {SERVER_ARGS_ENV_VARIABLE} environment variable"
+                )
                 # if server info was provided on the command line, us that. Otherwise use the env
-            
-            
-            
+
             config.test.infrastructure_config.infrastructures = []
-            
+
             for server in args.server_info:
-                address,username,password,web_ui_port,hec_port,api_port = server.split(",")
-                config.test.infrastructure_config.infrastructures.append(Infrastructure(splunk_app_username=username,
-                                                                                        splunk_app_password=password,
-                                                                                        instance_address=address, 
-                                                                                        hec_port=int(hec_port),
-                                                                                        web_ui_port=int(web_ui_port),
-                                                                                        api_port=int(api_port)))
-            
+                address, username, password, web_ui_port, hec_port, api_port = server.split(",")
+                config.test.infrastructure_config.infrastructures.append(
+                    Infrastructure(
+                        splunk_app_username=username,
+                        splunk_app_password=password,
+                        instance_address=address,
+                        hec_port=int(hec_port),
+                        web_ui_port=int(web_ui_port),
+                        api_port=int(api_port)
+                    )
+                )
+
     # We do this before generating the app to save some time if options are incorrect.
     # For example, if the detection(s) we are trying to test do not exist
     githubService = GithubService(config.test)
 
-    
     director_output_dto = build(args, config)
 
-    
-
     # All this information will later come from the config, so we will
     # be able to do it in Test().execute. For now, we will do it here
     app = App(
@@ -229,16 +247,14 @@ def test(args: argparse.Namespace):
 
     config.test.apps = [app] + config.test.apps
 
-    
     test_input_dto = TestInputDto(
         director_output_dto=director_output_dto,
         githubService=githubService,
         config=config.test,
     )
-    
+
     test = Test()
 
-    
     result = test.execute(test_input_dto)
     # This return code is important.  Even if testing
     # fully completes, if everything does not pass then
@@ -248,7 +264,6 @@ def test(args: argparse.Namespace):
     else:
         sys.exit(1)
 
-    
 
 def validate(args) -> None:
     config = start(args)
@@ -259,11 +274,11 @@ def validate(args) -> None:
     elif args.type == "api":
         product_type = SecurityContentProduct.API
     else:
-        print(f"Invalid build type. Valid options app, ssa or api")
+        print("Invalid build type. Valid options app, ssa or api")
         sys.exit(1)
     director_input_dto = DirectorInputDto(
-        input_path=pathlib.Path(args.path), 
-        product=product_type, 
+        input_path=pathlib.Path(args.path),
+        product=product_type,
         config=config
     )
     validate_input_dto = ValidateInputDto(director_input_dto=director_input_dto)
@@ -325,16 +340,16 @@ def convert(args) -> None:
         sys.exit(1)
 
     sigma_converter_input_dto = SigmaConverterInputDto(
-        data_model = data_model,
-        detection_path = args.detection_path,
-        detection_folder = args.detection_folder, 
-        input_path = args.path,
-        log_source = args.log_source
+        data_model=data_model,
+        detection_path=args.detection_path,
+        detection_folder=args.detection_folder,
+        input_path=args.path,
+        log_source=args.log_source
     )
 
     convert_input_dto = ConvertInputDto(
-        sigma_converter_input_dto = sigma_converter_input_dto,
-        output_path = os.path.abspath(args.output)
+        sigma_converter_input_dto=sigma_converter_input_dto,
+        output_path=os.path.abspath(args.output)
     )
     convert = Convert()
     convert.execute(convert_input_dto)
@@ -346,7 +361,7 @@ def main():
     :param args: arguments passed by the user on command line while calling the script.
     :return: returns the output of the function called.
     """
-    
+
     # grab arguments
     parser = argparse.ArgumentParser(
         description="Use `contentctl action -h` to get help with any Splunk content action"
@@ -381,6 +396,7 @@ def main():
     reporting_parser = actions_parser.add_parser(
         "report", help="create Splunk content report of the current pack"
     )
+    # TODO: is this action dead?
     inspect_parser = actions_parser.add_parser(
         "inspect",
         help="runs Splunk appinspect on a build Splunk app to ensure that an app meets Splunkbase requirements.",
@@ -400,11 +416,16 @@ def main():
     convert_parser = actions_parser.add_parser("convert", help="Convert a sigma detection to a Splunk ESCU detection.")
 
     init_parser.set_defaults(func=initialize)
-    init_parser.add_argument("--demo", action=argparse.BooleanOptionalAction, 
-                             help="Use this flag to pre-populate the content pack "
-                             "with one additional detection that will fail 'contentctl validate' "
-                             "and on detection that will fail 'contentctl test'.  This is useful "
-                             "for demonstrating contentctl functionality.")
+    init_parser.add_argument(
+        "--demo",
+        action=argparse.BooleanOptionalAction,
+        help=(
+            "Use this flag to pre-populate the content pack "
+            "with one additional detection that will fail 'contentctl validate' "
+            "and on detection that will fail 'contentctl test'.  This is useful "
+            "for demonstrating contentctl functionality."
+        )
+    )
 
     validate_parser.add_argument(
         "-t",
@@ -430,14 +451,20 @@ def main():
         required=False,
         type=str,
         default=None,
-        help=f"Username for running AppInspect on {SecurityContentProduct.SPLUNK_APP.name} ONLY. For documentation, please review https://dev.splunk.com/enterprise/reference/appinspect/appinspectapiepref"
+        help=(
+            f"Username for running AppInspect on {SecurityContentProduct.SPLUNK_APP.name} ONLY. For documentation, "
+            "please review https://dev.splunk.com/enterprise/reference/appinspect/appinspectapiepref"
+        )
     )
     build_parser.add_argument(
         "--appinspect_api_password",
         required=False,
         type=str,
         default=None,
-        help=f"Password for running AppInspect on {SecurityContentProduct.SPLUNK_APP.name} ONLY. For documentation, please review https://dev.splunk.com/enterprise/reference/appinspect/appinspectapiepref"
+        help=(
+            f"Password for running AppInspect on {SecurityContentProduct.SPLUNK_APP.name} ONLY. For documentation, "
+            "please review https://dev.splunk.com/enterprise/reference/appinspect/appinspectapiepref"
+        )
     )
 
     build_parser.set_defaults(func=build)
@@ -455,8 +482,6 @@ def main():
 
     reporting_parser.set_defaults(func=reporting)
 
-
-
     api_deploy_parser.set_defaults(func=api_deploy)
 
     test_parser.add_argument(
@@ -503,47 +528,78 @@ def main():
         "of detections to test. Their paths should be relative to the app path.",
     )
 
-
     test_parser.add_argument("--unattended", action=argparse.BooleanOptionalAction)
-    
-    
-    test_parser.add_argument("--infrastructure", required=False, type=str, 
-                                                 choices=DetectionTestingTargetInfrastructure._member_names_, default=None, 
-                                                 help="Determines what infrastructure to use for testing. The options are "
-                                                 "container and server.  Container will set up Splunk Container(s) at runtime, "
-                                                 "install all relevant apps, and perform configurations.  Server will use "
-                                                 "preconfigured server(s) either specified on the command line or in "
-                                                 "contentctl_test.yml.")
+
+    test_parser.add_argument(
+        "--infrastructure",
+        required=False,
+        type=str,
+        choices=DetectionTestingTargetInfrastructure._member_names_,
+        default=None,
+        help=(
+            "Determines what infrastructure to use for testing. The options are "
+            "container and server.  Container will set up Splunk Container(s) at runtime, "
+            "install all relevant apps, and perform configurations.  Server will use "
+            "preconfigured server(s) either specified on the command line or in "
+            "contentctl_test.yml."
+        )
+    )
     test_parser.add_argument("--num_containers", required=False, default=1, type=int)
     test_parser.add_argument("--server_info", required=False, default=None, type=str, nargs='+')
 
-    #Even though these are also options to build, make them available to test_parser
-    #as well to make the tool easier to use
+    # Even though these are also options to build, make them available to test_parser
+    # as well to make the tool easier to use
     test_parser.add_argument(
         "--appinspect_api_username",
         required=False,
         type=str,
         default=None,
-        help=f"Username for running AppInspect on {SecurityContentProduct.SPLUNK_APP.name} ONLY. For documentation, please review https://dev.splunk.com/enterprise/reference/appinspect/appinspectapiepref"
+        help=(
+            f"Username for running AppInspect on {SecurityContentProduct.SPLUNK_APP.name} ONLY. For documentation, "
+            "please review https://dev.splunk.com/enterprise/reference/appinspect/appinspectapiepref"
+        )
     )
     test_parser.add_argument(
         "--appinspect_api_password",
         required=False,
         type=str,
         default=None,
-        help=f"Password for running AppInspect on {SecurityContentProduct.SPLUNK_APP.name} ONLY. For documentation, please review https://dev.splunk.com/enterprise/reference/appinspect/appinspectapiepref"
+        help=(
+            f"Password for running AppInspect on {SecurityContentProduct.SPLUNK_APP.name} ONLY. For documentation, "
+            "please review https://dev.splunk.com/enterprise/reference/appinspect/appinspectapiepref"
+        )
+    )
+    # TODO: add this flag to the config?
+    test_parser.add_argument(
+        "--enable_integration_testing",
+        required=False,
+        action="store_true",
+        help="Whether integration testing should be enabled, in addition to unit testing (requires a configured Splunk "
+        "instance with ES installed)"
     )
 
     test_parser.set_defaults(func=test)
 
-    convert_parser.add_argument("-dm", "--data_model", required=False, type=str, default="cim", help="converter target, choose between cim, raw, ocsf")
+    convert_parser.add_argument(
+        "-dm",
+        "--data_model",
+        required=False,
+        type=str,
+        default="cim",
+        help="converter target, choose between cim, raw, ocsf"
+    )
     convert_parser.add_argument("-lo", "--log_source", required=False, type=str, help="converter log source")
     convert_parser.add_argument("-dp", "--detection_path", required=False, type=str, help="path to a single detection")
-    convert_parser.add_argument("-df", "--detection_folder", required=False, type=str, help="path to a detection folder")
+    convert_parser.add_argument(
+        "-df",
+        "--detection_folder",
+        required=False,
+        type=str,
+        help="path to a detection folder"
+    )
     convert_parser.add_argument("-o", "--output", required=True, type=str, help="output path to store the detections")
     convert_parser.set_defaults(func=convert)
 
-
     # parse them
     args = parser.parse_args()
 
@@ -554,6 +610,5 @@ def main():
         print(f"Error during contentctl:\n{str(e)}")
         import traceback
         traceback.print_exc()
-        #traceback.print_stack()
+        # traceback.print_stack()
         sys.exit(1)
-    
diff --git a/contentctl/objects/abstract_security_content_objects/detection_abstract.py b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
index 57b20ef2..f9410008 100644
--- a/contentctl/objects/abstract_security_content_objects/detection_abstract.py
+++ b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
@@ -1,17 +1,9 @@
 from __future__ import annotations
 
-import uuid
-import string
-import requests
-import time
-import sys
 import re
 import pathlib
-from pydantic import BaseModel, validator, root_validator, Extra
-from dataclasses import dataclass
+from pydantic import validator, root_validator
 from typing import Union
-from datetime import datetime, timedelta
-
 
 from contentctl.objects.security_content_object import SecurityContentObject
 from contentctl.objects.enums import AnalyticsType
@@ -29,19 +21,22 @@
 
 
 class Detection_Abstract(SecurityContentObject):
-    #contentType: SecurityContentType = SecurityContentType.detections
+    # contentType: SecurityContentType = SecurityContentType.detections
     type: str
     file_path: str = None
-    #status field is REQUIRED (the way to denote this with pydantic is ...)
+    # status field is REQUIRED (the way to denote this with pydantic is ...)
     status: DetectionStatus = ...
     data_source: list[str]
     tags: DetectionTags
     search: Union[str, dict]
     how_to_implement: str
     known_false_positives: str
-    check_references: bool = False  
+    check_references: bool = False
     references: list
-    
+
+    # TODO: consider restructuring this as a set of associated tests; each test should be an individual case, but 2
+    #   tests may rely on the same data and thus it doesn't make sense to replay the data twice; similarly, it doesn't
+    #   make sense to run an integration test if the unit test failed. Currently, IntegrationTest is a field in UnitTest
     tests: list[UnitTest] = []
 
     # enrichments
@@ -56,7 +51,7 @@ class Detection_Abstract(SecurityContentObject):
     lookups: list[Lookup] = []
     cve_enrichment: list = None
     splunk_app_enrichment: list = None
-    
+
     source: str = None
     nes_fields: str = None
     providing_technologies: list = None
@@ -65,12 +60,14 @@ class Detection_Abstract(SecurityContentObject):
     class Config:
         use_enum_values = True
 
-
-    def get_content_dependencies(self)->list[SecurityContentObject]:    
+    def get_content_dependencies(self) -> list[SecurityContentObject]:
         return self.playbooks + self.baselines + self.macros + self.lookups
-    
+
     @staticmethod
-    def get_detections_from_filenames(detection_filenames:set[str], all_detections:list[Detection_Abstract])->list[Detection_Abstract]:
+    def get_detections_from_filenames(
+        detection_filenames: set[str],
+        all_detections: list[Detection_Abstract]
+    ) -> list[Detection_Abstract]:
         detection_filenames = set(str(pathlib.Path(filename).absolute()) for filename in detection_filenames)
         detection_dict = SecurityContentObject.create_filename_to_content_dict(all_detections)
 
@@ -78,7 +75,6 @@ def get_detections_from_filenames(detection_filenames:set[str], all_detections:l
             return [detection_dict[detection_filename] for detection_filename in detection_filenames]
         except Exception as e:
             raise Exception(f"Failed to find detection object for modified detection: {str(e)}")
-        
 
     @validator("type")
     def type_valid(cls, v, values):
@@ -88,29 +84,34 @@ def type_valid(cls, v, values):
 
     @validator('how_to_implement', 'search', 'known_false_positives')
     def encode_error(cls, v, values, field):
-        if not isinstance(v,str):
-            if isinstance(v,dict) and field.name == "search":
-                #This is a special case of the search field.  It can be a dict, containing
-                #a sigma search, if we are running the converter. So we will not
-                #validate the field further. Additional validation will be done
-                #during conversion phase later on
+        if not isinstance(v, str):
+            if isinstance(v, dict) and field.name == "search":
+                # This is a special case of the search field.  It can be a dict, containing
+                # a sigma search, if we are running the converter. So we will not
+                # validate the field further. Additional validation will be done
+                # during conversion phase later on
                 return v
             else:
-                #No other fields should contain a non-str type:
-                raise ValueError(f"Error validating field '{field.name}'. Field MUST be be a string, not type '{type(v)}' ")
+                # No other fields should contain a non-str type:
+                raise ValueError(
+                    f"Error validating field '{field.name}'. Field MUST be be a string, not type '{type(v)}' "
+                )
 
-        return SecurityContentObject.free_text_field_valid(cls,v,values,field)
+        return SecurityContentObject.free_text_field_valid(cls, v, values, field)
 
     @validator("status")
     def validation_for_ba_only(cls, v, values):
         # Ensure that only a BA detection can have status: validation
         p = pathlib.Path(values['file_path'])
         if v == DetectionStatus.validation.value:
-            if p.name.startswith("ssa___"): 
+            if p.name.startswith("ssa___"):
                 pass
             else:
-                raise ValueError(f"The following is NOT an ssa_ detection, but has 'status: {v}' which may ONLY be used for ssa_ detections: {values['file_path']}")
-        
+                raise ValueError(
+                    f"The following is NOT an ssa_ detection, but has 'status: {v}' which may ONLY be used for "
+                    f"ssa_ detections: {values['file_path']}"
+                )
+
         return v
 
     # @root_validator
@@ -128,59 +129,65 @@ def validation_for_ba_only(cls, v, values):
     # def references_check(cls, v, values):
     #     return LinkValidator.check_references(v, values["name"])
     #     return v
-    
 
     @validator("search")
     def search_obsersables_exist_validate(cls, v, values):
         if type(v) is str:
-            tags:DetectionTags = values.get("tags")
-            if tags == None:
+            tags: DetectionTags = values.get("tags")
+            if tags is None:
                 raise ValueError("Unable to parse Detection Tags.  Please resolve Detection Tags errors")
-            
+
             observable_fields = [ob.name.lower() for ob in tags.observable]
-            
-            #All $field$ fields from the message must appear in the search
+
+            # All $field$ fields from the message must appear in the search
             field_match_regex = r"\$([^\s.]*)\$"
-            
-            message_fields = [match.replace("$", "").lower() for match in re.findall(field_match_regex, tags.message.lower())]
+
+            message_fields = [
+                match.replace("$", "").lower() for match in re.findall(field_match_regex, tags.message.lower())
+            ]
             missing_fields = set([field for field in observable_fields if field not in v.lower()])
 
             error_messages = []
             if len(missing_fields) > 0:
-                error_messages.append(f"The following fields are declared as observables, but do not exist in the search: {missing_fields}")
+                error_messages.append(
+                    f"The following fields are declared as observables, but do not exist in the search: "
+                    f"{missing_fields}"
+                )
 
-            
             missing_fields = set([field for field in message_fields if field not in v.lower()])
             if len(missing_fields) > 0:
-                error_messages.append(f"The following fields are used as fields in the message, but do not exist in the search: {missing_fields}")
-            
+                error_messages.append(
+                    f"The following fields are used as fields in the message, but do not exist in the search: "
+                    f"{missing_fields}"
+                )
+
             if len(error_messages) > 0 and values.get("status") == DetectionStatus.production.value:
                 msg = "\n\t".join(error_messages)
-                raise(ValueError(msg))
-        
+                raise (ValueError(msg))
+
         # Found everything
         return v
 
     @validator("tests")
     def tests_validate(cls, v, values):
-        if values.get("status","") == DetectionStatus.production.value and not v:
+        if values.get("status", "") == DetectionStatus.production.value and not v:
             raise ValueError(
                 "At least one test is REQUIRED for production detection: " + values["name"]
             )
         return v
-    
+
     @validator("datamodel")
     def datamodel_valid(cls, v, values):
         for datamodel in v:
             if datamodel not in [el.name for el in DataModel]:
                 raise ValueError("not valid data model: " + values["name"])
         return v
-    
+
     def all_tests_successful(self) -> bool:
         if len(self.tests) == 0:
             return False
         for test in self.tests:
-            if test.result is None or test.result.success == False:
+            if test.result is None or test.result.success is False:
                 return False
         return True
 
diff --git a/contentctl/objects/base_test.py b/contentctl/objects/base_test.py
new file mode 100644
index 00000000..c9221630
--- /dev/null
+++ b/contentctl/objects/base_test.py
@@ -0,0 +1,34 @@
+from enum import Enum
+from typing import Union
+
+from pydantic import BaseModel
+
+from contentctl.objects.base_test_result import BaseTestResult
+
+
+class TestType(str, Enum):
+    """
+    Types of tests
+    """
+    UNIT = "unit"
+    INTEGRATION = "integration"
+
+
+class BaseTest(BaseModel):
+    """
+    A test case for a detection
+    """
+    # Test name
+    name: str
+
+    # The test type
+    test_type: TestType
+
+    # Search window start time
+    earliest_time: Union[str, None] = None
+
+    # Search window end time
+    latest_time: Union[str, None] = None
+
+    # The test result
+    result: Union[None, BaseTestResult] = None
diff --git a/contentctl/objects/base_test_result.py b/contentctl/objects/base_test_result.py
new file mode 100644
index 00000000..88e5dd08
--- /dev/null
+++ b/contentctl/objects/base_test_result.py
@@ -0,0 +1,14 @@
+from pydantic import BaseModel
+
+from typing import Union
+
+
+class BaseTestResult(BaseModel):
+    message: Union[None, str] = None
+    exception: Union[Exception, None] = None
+    success: bool = False
+    duration: float = 0
+
+    class Config:
+        validate_assignment = True
+        arbitrary_types_allowed = True
diff --git a/contentctl/objects/detection.py b/contentctl/objects/detection.py
index 90501bda..332ceffe 100644
--- a/contentctl/objects/detection.py
+++ b/contentctl/objects/detection.py
@@ -1,28 +1,8 @@
-import uuid
-import string
-import requests
-import time
-import sys
-
-from pydantic import BaseModel, validator, root_validator, Extra
-from dataclasses import dataclass
 from typing import Union
-from datetime import datetime, timedelta
-
+from pydantic import validator
 
 from contentctl.objects.abstract_security_content_objects.detection_abstract import Detection_Abstract
-from contentctl.objects.enums import AnalyticsType
-from contentctl.objects.enums import DataModel
-from contentctl.objects.enums import DetectionStatus
-from contentctl.objects.detection_tags import DetectionTags
-from contentctl.objects.config import ConfigDetectionConfiguration
-from contentctl.objects.unit_test import UnitTest
-from contentctl.objects.macro import Macro
-from contentctl.objects.lookup import Lookup
-from contentctl.objects.baseline import Baseline
-from contentctl.objects.playbook import Playbook
-from contentctl.helper.link_validator import LinkValidator
-from contentctl.objects.enums import SecurityContentType
+from contentctl.objects.test_group import TestGroup
 
 
 class Detection(Detection_Abstract):
@@ -30,10 +10,30 @@ class Detection(Detection_Abstract):
     # You may add fields and/or validations
 
     # You may also experiment with removing fields
-    # and/or validations,  or chagning validation(s).  
-    # Please be aware that many defaults field(s) 
+    # and/or validations,  or chagning validation(s).
+    # Please be aware that many defaults field(s)
     # or validation(s) are required and removing or
     # them or modifying their behavior may cause
-    # undefined issues with the contentctl tooling 
-    # or output of the tooling. 
-    pass
\ No newline at end of file
+    # undefined issues with the contentctl tooling
+    # or output of the tooling.
+
+    # A list of groups of tests, relying on the same data
+    test_groups: Union[list[TestGroup], None] = None
+
+    @validator("test_groups", always=True)
+    def validate_test_groups(cls, value, values) -> Union[list[TestGroup], None]:
+        """
+        Validates the `test_groups` field and constructs the model from the list of unit tests
+        if no explicit construct was provided
+        :param value: the value of the field `test_groups`
+        :param values: a dict of the other fields in the Detection model
+        """
+        # if the value was not the None default, do nothing
+        if value is not None:
+            return value
+
+        # iterate over the unit tests and create a TestGroup (and as a result, an IntegrationTest) for each
+        test_groups: list[TestGroup] = []
+        for unit_test in values["tests"]:
+            test_groups.append(TestGroup.derive_from_unit_test(unit_test, values["name"]))
+        return test_groups
diff --git a/contentctl/objects/enums.py b/contentctl/objects/enums.py
index b62ee474..b89dbdf1 100644
--- a/contentctl/objects/enums.py
+++ b/contentctl/objects/enums.py
@@ -48,24 +48,29 @@ class SecurityContentType(enum.Enum):
 #     splunk_app = "splunk_app"
 #     ba_objects = "ba_objects"
 #     json_objects = "json_objects"
+
+
 class SecurityContentProduct(enum.Enum):
     SPLUNK_APP = 1
     SSA = 2
     API = 3
     CUSTOM = 4
 
+
 class SigmaConverterTarget(enum.Enum):
     CIM = 1
     RAW = 2
     OCSF = 3
     ALL = 4
 
+
 class DetectionStatus(enum.Enum):
     production = "production"
     deprecated = "deprecated"
     experimental = "experimental"
     validation = "validation"
 
+
 class LogLevel(enum.Enum):
     NONE = "NONE"
     ERROR = "ERROR"
@@ -77,7 +82,8 @@ class AlertActions(enum.Enum):
     rba = "rba"
     email = "email"
 
-class StoryCategory(str,enum.Enum):
+
+class StoryCategory(str, enum.Enum):
     ABUSE = "Abuse"
     ADVERSARY_TACTICS = "Adversary Tactics"
     BEST_PRACTICES = "Best Practices"
@@ -86,7 +92,6 @@ class StoryCategory(str,enum.Enum):
     MALWARE = "Malware"
     UNCATEGORIZED = "Uncategorized"
     VULNERABILITY = "Vulnerability"
-    
 
     # The following categories are currently used in
     # security_content stories but do not appear
@@ -96,11 +101,10 @@ class StoryCategory(str,enum.Enum):
     ACCOUNT_COMPROMSE = "Account Compromise"
     DATA_DESTRUCTION = "Data Destruction"
     LATERAL_MOVEMENT = "Lateral Movement"
-    PRIVILEGE_ESCALATION  = "Privilege Escalation"
+    PRIVILEGE_ESCALATION = "Privilege Escalation"
     RANSOMWARE = "Ransomware"
     UNAUTHORIZED_SOFTWARE = "Unauthorized Software"
-  
-  
+
 
 class PostTestBehavior(str, enum.Enum):
     always_pause = "always_pause"
@@ -125,9 +129,3 @@ class InstanceState(str, enum.Enum):
     error = "error"
     stopping = "stopping"
     stopped = "stopped"
-
-class SigmaConverterTarget(enum.Enum):
-    CIM = 1
-    RAW = 2
-    OCSF = 3
-    ALL = 4
\ No newline at end of file
diff --git a/contentctl/objects/integration_test.py b/contentctl/objects/integration_test.py
new file mode 100644
index 00000000..34d1f634
--- /dev/null
+++ b/contentctl/objects/integration_test.py
@@ -0,0 +1,32 @@
+from typing import Union, Self
+
+from pydantic import Field
+
+from contentctl.objects.base_test import BaseTest, TestType
+from contentctl.objects.unit_test import UnitTest
+from contentctl.objects.integration_test_result import IntegrationTestResult
+
+
+class IntegrationTest(BaseTest):
+    """
+    An integration test for a detection against ES
+    """
+    # The test type (integration)
+    test_type: TestType = Field(TestType.INTEGRATION, const=True)
+
+    # The test result
+    result: Union[None, IntegrationTestResult] = None
+
+    # TODO: how often do we actually encounter tests w/ an earliest/latest time defined?
+    @classmethod
+    def derive_from_unit_test(cls, unit_test: UnitTest) -> Self:
+        """
+        Given a UnitTest, construct an IntegrationTest
+        :param unit_test: the UnitTest
+        :returns: IntegrationTest
+        """
+        return cls(
+            name=unit_test.name,
+            earliest_time=unit_test.earliest_time,
+            latest_time=unit_test.latest_time,
+        )
diff --git a/contentctl/objects/integration_test_result.py b/contentctl/objects/integration_test_result.py
new file mode 100644
index 00000000..f66a4740
--- /dev/null
+++ b/contentctl/objects/integration_test_result.py
@@ -0,0 +1,5 @@
+from contentctl.objects.base_test_result import BaseTestResult
+
+
+class IntegrationTestResult(BaseTestResult):
+    pass
diff --git a/contentctl/objects/test_config.py b/contentctl/objects/test_config.py
index e3ef907a..40e1a6ad 100644
--- a/contentctl/objects/test_config.py
+++ b/contentctl/objects/test_config.py
@@ -7,7 +7,6 @@
 import yaml
 import os
 from pydantic import BaseModel, validator, root_validator, Extra, Field
-from dataclasses import dataclass
 from typing import Union
 import re
 import docker
@@ -40,6 +39,7 @@ def getTestConfigFromYMLFile(path: pathlib.Path):
     except Exception as e:
         print(f"Error loading test configuration file '{path}': {str(e)}")
 
+
 class Infrastructure(BaseModel, extra=Extra.forbid, validate_assignment=True):
     splunk_app_username: Union[str, None] = Field(
         default="admin", title="The name of the user for testing"
@@ -486,6 +486,11 @@ class TestConfig(BaseModel, extra=Extra.forbid, validate_assignment=True):
         default=App.get_default_apps(),
         title="A list of all the apps to be installed on each container",
     )
+    enable_integration_testing: bool = Field(
+        defauls=False,
+        title="Whether integration testing should be enabled, in addition to unit testing (requires a configured Splunk"
+        " instance with ES installed)"
+    )
     
 
 
diff --git a/contentctl/objects/test_group.py b/contentctl/objects/test_group.py
new file mode 100644
index 00000000..d8202a75
--- /dev/null
+++ b/contentctl/objects/test_group.py
@@ -0,0 +1,37 @@
+from typing import Self
+
+from pydantic import BaseModel
+
+from contentctl.objects.unit_test import UnitTest
+from contentctl.objects.integration_test import IntegrationTest
+from contentctl.objects.unit_test_attack_data import UnitTestAttackData
+
+
+class TestGroup(BaseModel):
+    """
+    Groups of different types of tests relying on the same attack data
+    """
+    name: str
+    unit_test: UnitTest
+    integration_test: IntegrationTest
+    attack_data: list[UnitTestAttackData]
+
+    # TODO: how often do we actually encounter tests w/ an earliest/latest time defined?
+    @classmethod
+    def derive_from_unit_test(cls, unit_test: UnitTest, name_prefix: str) -> Self:
+        """
+        Given a UnitTest and a prefix, construct a TestGroup, with in IntegrationTest corresponding to the UnitTest
+        :param unit_test: the UnitTest
+        :param name_prefix: the prefix to be used for the TestGroup name (typically the Detection name)
+        :returns: TestGroup
+        """
+        # construct the IntegrationTest
+        integration_test = IntegrationTest.derive_from_unit_test(unit_test)
+
+        # contruct and return the TestGroup
+        return cls(
+            name=f"{name_prefix}:{unit_test.name}",
+            unit_test=unit_test,
+            integration_test=integration_test,
+            attack_data=unit_test.attack_data
+        )
diff --git a/contentctl/objects/unit_test.py b/contentctl/objects/unit_test.py
index 5167480a..9f448203 100644
--- a/contentctl/objects/unit_test.py
+++ b/contentctl/objects/unit_test.py
@@ -1,23 +1,34 @@
 
 
 from typing import Union
-from pydantic import BaseModel, validator, ValidationError
 
-from contentctl.objects.security_content_object import SecurityContentObject
+from pydantic import Field
+
+# from contentctl.objects.security_content_object import SecurityContentObject
+# from contentctl.objects.enums import SecurityContentType
 from contentctl.objects.unit_test_baseline import UnitTestBaseline
 from contentctl.objects.unit_test_attack_data import UnitTestAttackData
 from contentctl.objects.unit_test_result import UnitTestResult
-from contentctl.objects.enums import SecurityContentType
+from contentctl.objects.base_test import BaseTest, TestType
+
+
+class UnitTest(BaseTest):
+    """
+    A unit test for a detection
+    """
+    # contentType: SecurityContentType = SecurityContentType.unit_tests
 
+    # The test type (unit)
+    test_type: TestType = Field(TestType.UNIT, const=True)
 
-class UnitTest(BaseModel):
-    #contentType: SecurityContentType = SecurityContentType.unit_tests
-    name: str
+    # The condition to check if the searh was successful
     pass_condition: Union[str, None] = None
-    earliest_time: Union[str, None] = None
-    latest_time: Union[str, None] = None
+
+    # Baselines to be run before a unit test
     baselines: list[UnitTestBaseline] = []
+
+    # The attack data to be ingested for the unit test
     attack_data: list[UnitTestAttackData]
-    result: Union[None, UnitTestResult]
 
-    
\ No newline at end of file
+    # The result of the unit test
+    result: Union[None, UnitTestResult] = None
diff --git a/contentctl/objects/unit_test_result.py b/contentctl/objects/unit_test_result.py
index b6054cac..e945ec0c 100644
--- a/contentctl/objects/unit_test_result.py
+++ b/contentctl/objects/unit_test_result.py
@@ -1,11 +1,10 @@
-from pydantic import BaseModel, root_validator, validator
-
-
 from typing import Union
-from datetime import timedelta
+
 from splunklib.data import Record
+
 from contentctl.objects.test_config import Infrastructure
 from contentctl.helper.utils import Utils
+from contentctl.objects.base_test_result import BaseTestResult
 
 FORCE_TEST_FAILURE_FOR_MISSING_OBSERVABLE = False
 
@@ -13,18 +12,10 @@
 SID_TEMPLATE = "{server}:{web_port}/en-US/app/search/search?sid={sid}"
 
 
-class UnitTestResult(BaseModel):
+class UnitTestResult(BaseTestResult):
     job_content: Union[Record, None] = None
     missing_observables: list[str] = []
     sid_link: Union[None, str] = None
-    message: Union[None, str] = None
-    exception: Union[Exception,None] = None
-    success: bool = False
-    duration: float = 0
-
-    class Config:
-        validate_assignment = True
-        arbitrary_types_allowed = True
 
     def get_summary_dict(
         self,
@@ -85,6 +76,7 @@ def set_job_content(
                 sid=content.get("sid", None),
             )
 
+        # TODO: this error message seems not the most helpful, since content must be None for it to be set
         elif content is None:
             self.job_content = None
             self.success = False

From d91cd45a0d417501377c1d7e8401e15f1a5ad96d Mon Sep 17 00:00:00 2001
From: Casey McGinley <cmcginley@splunk.com>
Date: Wed, 15 Nov 2023 21:06:56 -0800
Subject: [PATCH 02/22] fixing config default

---
 contentctl/objects/test_config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contentctl/objects/test_config.py b/contentctl/objects/test_config.py
index 40e1a6ad..59896a30 100644
--- a/contentctl/objects/test_config.py
+++ b/contentctl/objects/test_config.py
@@ -487,7 +487,7 @@ class TestConfig(BaseModel, extra=Extra.forbid, validate_assignment=True):
         title="A list of all the apps to be installed on each container",
     )
     enable_integration_testing: bool = Field(
-        defauls=False,
+        default=False,
         title="Whether integration testing should be enabled, in addition to unit testing (requires a configured Splunk"
         " instance with ES installed)"
     )

From ccf2520de68723ae2f4d27d905af27dad97b6857 Mon Sep 17 00:00:00 2001
From: Casey McGinley <cmcginley@splunk.com>
Date: Wed, 15 Nov 2023 21:19:00 -0800
Subject: [PATCH 03/22] todos, removed some default values, added TestGroup
 write, added some debugging statements

---
 .../DetectionTestingInfrastructure.py         | 40 ++++++++++++++++---
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
index 1a397e0d..c09f8fbd 100644
--- a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
+++ b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
@@ -65,6 +65,7 @@ class TestingStates(str, Enum):
     PROCESSING = "Waiting for Processing"
     SEARCHING = "Running Search"
     DELETING = "Deleting Data"
+    DONE_GROUP = "Test Group Done"
 
 
 # the longest length of any state
@@ -80,6 +81,8 @@ class FinalTestingStates(str, Enum):
 
 
 # max length of a test name
+# TODO: this max size is declared, and it is used appropriatel w/ .ljust, but nothing truncates test names to makes 
+#   them the appropriate size
 MAX_TEST_NAME_LENGTH = 70
 
 # The format string used for pbar reporting
@@ -90,12 +93,15 @@ class SetupTestGroupResults(BaseModel):
     exception: Union[Exception, None] = None
     success: bool = True
     duration: float = 0
-    start_time: float = 0
+    start_time: float
+
+    class Config:
+        arbitrary_types_allowed = True
 
 
 class CleanupTestGroupResults(BaseModel):
-    duration: float = 0
-    start_time: float = 0
+    duration: float
+    start_time: float
 
 
 class ContainerStoppedException(Exception):
@@ -433,7 +439,7 @@ def execute(self):
                 self.finish()
                 return
             except Exception as e:
-                self.pbar.write(f"Error testing detection: {str(e)}")
+                self.pbar.write(f"Error testing detection: {type(e).__name__}: {str(e)}")
             finally:
                 self.sync_obj.outputQueue.append(detection)
                 self.sync_obj.currentTestingQueue[self.get_name()] = None
@@ -478,6 +484,20 @@ def test_detection(self, detection: Detection):
                     test_group.integration_test.result.duration + setup_results.duration + cleanup_results.duration,
                     2
                 )
+      
+            self.pbar.write(f"duration: {test_group.unit_test.result.duration}")
+            self.pbar.write(f"setup_results.start_time: {setup_results.start_time}")
+
+            # Write test group status
+            self.pbar.write(
+                self.format_pbar_string(
+                    TestReportingType.GROUP,
+                    test_group.name,
+                    TestingStates.DONE_GROUP.value,
+                    setup_results.start_time,
+                    set_pbar=False,
+                )
+            )
 
     def setup_test_group(self, test_group: TestGroup) -> SetupTestGroupResults:
         """
@@ -488,6 +508,7 @@ def setup_test_group(self, test_group: TestGroup) -> SetupTestGroupResults:
         """
         # Capture the setup start time
         setup_start_time = time.time()
+        self.pbar.write(f"setup_start_time: {setup_start_time}")
         self.pbar.reset()
         self.format_pbar_string(
             TestReportingType.GROUP,
@@ -499,7 +520,7 @@ def setup_test_group(self, test_group: TestGroup) -> SetupTestGroupResults:
         # Use NullBar if there is more than 1 container or we are running
         # in a non-interactive context
 
-        results = SetupTestGroupResults()
+        results = SetupTestGroupResults(start_time=setup_start_time)
 
         try:
             self.replay_attack_data_files(test_group, setup_start_time)
@@ -700,7 +721,13 @@ def execute_unit_test(
         if test.result is not None:
             test.result.duration = round(time.time() - test_start_time, 2)
 
-    def execute_integration_test(self, detection: Detection) -> None:
+    def execute_integration_test(
+        self,
+        detection: Detection,
+        test: UnitTest,
+        setup_results: SetupTestGroupResults,
+        FORCE_ALL_TIME: bool = True
+    ):
         """
         Executes an integration test on the detection
         :param detection: the detection on which to run the test
@@ -818,6 +845,7 @@ def retry_search_until_timeout(
                             success=True,
                             duration=time.time() - search_start_time,
                         )
+                        self.pbar.write(f"runDuration: {test.result.job_content.runDuration}")
                         return
 
                     else:

From 94fac391a1e9fbf1f76d8a0f781da0de5cc15fd7 Mon Sep 17 00:00:00 2001
From: Casey McGinley <cmcginley@splunk.com>
Date: Wed, 15 Nov 2023 21:19:33 -0800
Subject: [PATCH 04/22] comments

---
 contentctl/objects/unit_test_result.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/contentctl/objects/unit_test_result.py b/contentctl/objects/unit_test_result.py
index e945ec0c..0769bbfd 100644
--- a/contentctl/objects/unit_test_result.py
+++ b/contentctl/objects/unit_test_result.py
@@ -26,7 +26,7 @@ def get_summary_dict(
         for field in model_fields:
             if getattr(self, field) is not None:
                 if isinstance(getattr(self, field), Exception):
-                    #Exception cannot be serialized, so convert to str
+                    # Exception cannot be serialized, so convert to str
                     results_dict[field] = str(getattr(self, field))
                 else:
                     results_dict[field] = getattr(self, field)
@@ -53,10 +53,12 @@ def set_job_content(
         success: bool = False,
         duration: float = 0,
     ):
+        # Set duration, exception and success
         self.duration = round(duration, 2)
         self.exception = exception
         self.success = success
 
+        # Set the job content, if given
         if content is not None:
             self.job_content = content
             

From 51816a3a8094159252abc4a9dd52ab02e49e506b Mon Sep 17 00:00:00 2001
From: Casey McGinley <cmcginley@splunk.com>
Date: Thu, 16 Nov 2023 16:58:31 -0800
Subject: [PATCH 05/22] adding IntegrationTests to list of tests; adding logic
 for status; including test_type, duration and status in view output

---
 .../DetectionTestingManager.py                |   4 +-
 .../DetectionTestingInfrastructure.py         | 138 ++++++++++++++++--
 .../views/DetectionTestingView.py             |  51 +++++--
 .../views/DetectionTestingViewWeb.py          |   2 +-
 contentctl/contentctl.py                      |   3 +-
 contentctl/input/detection_builder.py         |  15 ++
 contentctl/input/director.py                  |   4 +
 .../detection_abstract.py                     |  32 +++-
 contentctl/objects/base_test.py               |  10 +-
 contentctl/objects/base_test_result.py        |  72 ++++++++-
 contentctl/objects/detection.py               |   7 +-
 contentctl/objects/integration_test.py        |  11 ++
 contentctl/objects/unit_test.py               |  13 +-
 contentctl/objects/unit_test_result.py        |  46 ++----
 14 files changed, 332 insertions(+), 76 deletions(-)

diff --git a/contentctl/actions/detection_testing/DetectionTestingManager.py b/contentctl/actions/detection_testing/DetectionTestingManager.py
index 370d97a1..70f9f2e1 100644
--- a/contentctl/actions/detection_testing/DetectionTestingManager.py
+++ b/contentctl/actions/detection_testing/DetectionTestingManager.py
@@ -121,7 +121,7 @@ def sigint_handler(signum, frame):
                     instance_pool.submit(instance.execute): instance
                     for instance in self.detectionTestingInfrastructureObjects
                 }
-                # What for execution to finish
+                # Wait for execution to finish
                 for future in concurrent.futures.as_completed(future_instances_execute):
                     try:
                         result = future.result()
@@ -131,6 +131,7 @@ def sigint_handler(signum, frame):
 
             self.output_dto.terminate = True
 
+            # Shut down all the views and wait for the shutdown to finish
             future_views_shutdowner = {
                 view_shutdowner.submit(view.stop): view for view in self.input_dto.views
             }
@@ -140,6 +141,7 @@ def sigint_handler(signum, frame):
                 except Exception as e:
                     print(f"Error stopping view: {str(e)}")
 
+            # Wait for original view-related threads to complete
             for future in concurrent.futures.as_completed(future_views):
                 try:
                     result = future.result()
diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
index c09f8fbd..0054a408 100644
--- a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
+++ b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
@@ -29,8 +29,11 @@
 from contentctl.objects.unit_test import UnitTest
 from contentctl.objects.unit_test_attack_data import UnitTestAttackData
 from contentctl.objects.unit_test_result import UnitTestResult
+from contentctl.objects.integration_test import IntegrationTest
+from contentctl.objects.integration_test_result import IntegrationTestResult
 from contentctl.objects.test_config import TestConfig, Infrastructure
 from contentctl.objects.test_group import TestGroup
+from contentctl.objects.base_test_result import TestResultStatus
 from contentctl.helper.utils import Utils
 from contentctl.actions.detection_testing.DataManipulation import (
     DataManipulation,
@@ -42,16 +45,16 @@ class TestReportingType(str, Enum):
     5-char identifiers for the type of testing being reported on
     """
     # Reporting around general testing setup (e.g. infra, role configuration)
-    SETUP = "setup"
+    SETUP = "SETUP"
 
     # Reporting around a group of tests
-    GROUP = "group"
+    GROUP = "GROUP"
 
     # Reporting around a unit test
-    UNIT = "unit "
+    UNIT = "UNIT "
 
     # Reporting around an integration test
-    INTEGRATION = "integ"
+    INTEGRATION = "INTEG"
 
 
 class TestingStates(str, Enum):
@@ -78,6 +81,7 @@ class FinalTestingStates(str, Enum):
     """
     FAIL = "\x1b[0;30;41m" + "FAIL".ljust(LONGEST_STATE) + "\x1b[0m"
     PASS = "\x1b[0;30;42m" + "PASS".ljust(LONGEST_STATE) + "\x1b[0m"
+    SKIP = "\x1b[0;30;47m" + "SKIP".ljust(LONGEST_STATE) + "\x1b[0m"
 
 
 # max length of a test name
@@ -86,7 +90,7 @@ class FinalTestingStates(str, Enum):
 MAX_TEST_NAME_LENGTH = 70
 
 # The format string used for pbar reporting
-PBAR_FORMAT_STRING = "({test_reporting_type}) {test_name} >> {state} | Time: {time}"
+PBAR_FORMAT_STRING = "[{test_reporting_type}] {test_name} >> {state} | Time: {time}"
 
 
 class SetupTestGroupResults(BaseModel):
@@ -422,6 +426,10 @@ def execute(self):
             try:
                 detection = self.sync_obj.inputQueue.pop()
                 if detection.status != DetectionStatus.production.value:
+                    # NOTE: we handle skipping entire detections differently than we do skipping individual test cases;
+                    #   we skip entire detections by excluding them to an entirely separate queue, while we skip 
+                    #   individual test cases via the BaseTest.skip() method, such as when we are skipping all 
+                    #   integration tests (see DetectionBuilder.skipIntegrationTests)
                     self.sync_obj.skippedQueue.append(detection)
                     self.pbar.write(f"\nSkipping {detection.name} since it is status: {detection.status}\n")
                     continue
@@ -462,13 +470,12 @@ def test_detection(self, detection: Detection):
             # run unit test
             self.execute_unit_test(detection, test_group.unit_test, setup_results)
 
-            # run integration test (if enabled)
-            if self.global_config.enable_integration_testing:
-                self.execute_integration_test(
-                    detection,
-                    test_group.integration_test,
-                    setup_results
-                )
+            # run integration test
+            self.execute_integration_test(
+                detection,
+                test_group.integration_test,
+                setup_results
+            )
 
             # cleanup
             cleanup_results = self.cleanup_test_group(test_group, setup_results.start_time)
@@ -484,7 +491,7 @@ def test_detection(self, detection: Detection):
                     test_group.integration_test.result.duration + setup_results.duration + cleanup_results.duration,
                     2
                 )
-      
+
             self.pbar.write(f"duration: {test_group.unit_test.result.duration}")
             self.pbar.write(f"setup_results.start_time: {setup_results.start_time}")
 
@@ -601,6 +608,20 @@ def execute_unit_test(
         # Capture unit test start time
         test_start_time = time.time()
 
+        # First, check to see if this test has been skipped; log and return if so
+        if test.result is not None and test.result.status == TestResultStatus.SKIP:
+            # report the skip to the CLI
+            self.pbar.write(
+                self.format_pbar_string(
+                    TestReportingType.UNIT,
+                    f"{detection.name}:{test.name}",
+                    FinalTestingStates.SKIP.value,
+                    start_time=test_start_time,
+                    set_pbar=False,
+                )
+            )
+            return
+
         # Reset the pbar and print that we are beginning a unit test
         self.pbar.reset()
         self.format_pbar_string(
@@ -724,7 +745,7 @@ def execute_unit_test(
     def execute_integration_test(
         self,
         detection: Detection,
-        test: UnitTest,
+        test: IntegrationTest,
         setup_results: SetupTestGroupResults,
         FORCE_ALL_TIME: bool = True
     ):
@@ -732,7 +753,94 @@ def execute_integration_test(
         Executes an integration test on the detection
         :param detection: the detection on which to run the test
         """
-        raise NotImplementedError
+        # Capture unit test start time
+        test_start_time = time.time()
+
+        # First, check to see if this test has been skipped; log and return if so
+        if test.result is not None and test.result.status == TestResultStatus.SKIP:
+            # report the skip to the CLI
+            self.pbar.write(
+                self.format_pbar_string(
+                    TestReportingType.INTEGRATION,
+                    f"{detection.name}:{test.name}",
+                    FinalTestingStates.SKIP.value,
+                    start_time=test_start_time,
+                    set_pbar=False,
+                )
+            )
+            return
+
+        # Reset the pbar and print that we are beginning a unit test
+        self.pbar.reset()
+        self.format_pbar_string(
+            TestReportingType.INTEGRATION,
+            f"{detection.name}:{test.name}",
+            TestingStates.BEGINNING_TEST,
+            test_start_time,
+        )
+
+        try:
+            raise NotImplementedError
+        except NotImplementedError:
+            test.result = IntegrationTestResult()
+            test.result.success = False
+
+        # Pause here if the terminate flag has NOT been set AND either of the below are true:
+        #   1. the behavior is always_pause
+        #   2. the behavior is pause_on_failure and the test failed
+        if (
+            self.global_config.post_test_behavior == PostTestBehavior.always_pause
+            or (
+                self.global_config.post_test_behavior == PostTestBehavior.pause_on_failure
+                and (test.result is None or test.result.success is False)
+            )
+        ) and not self.sync_obj.terminate:
+            # Determine the state to report to the user
+            if test.result is None:
+                res = "ERROR"
+                # link = detection.search
+            else:
+                res = test.result.success
+                if res:
+                    res = "PASS"
+                else:
+                    res = "FAIL"
+                # link = test.result.get_summary_dict()["sid_link"]
+
+            self.format_pbar_string(
+                TestReportingType.INTEGRATION,
+                f"{detection.name}:{test.name}",
+                f"{res} (CTRL+D to continue)",
+                test_start_time,
+            )
+
+            # Wait for user input
+            try:
+                _ = input()
+            except Exception:
+                pass
+
+        if test.result is not None and test.result.success:
+            self.pbar.write(
+                self.format_pbar_string(
+                    TestReportingType.INTEGRATION,
+                    f"{detection.name}:{test.name}",
+                    FinalTestingStates.PASS.value,
+                    start_time=test_start_time,
+                    set_pbar=False,
+                )
+            )
+        else:
+            # report the failure to the CLI
+            self.pbar.write(
+                self.format_pbar_string(
+                    TestReportingType.INTEGRATION,
+                    f"{detection.name}:{test.name}",
+                    FinalTestingStates.FAIL.value,
+                    start_time=test_start_time,
+                    set_pbar=False,
+                )
+            )
 
     def retry_search_until_timeout(
         self,
diff --git a/contentctl/actions/detection_testing/views/DetectionTestingView.py b/contentctl/actions/detection_testing/views/DetectionTestingView.py
index 7d03f8cb..f8c66b50 100644
--- a/contentctl/actions/detection_testing/views/DetectionTestingView.py
+++ b/contentctl/actions/detection_testing/views/DetectionTestingView.py
@@ -1,8 +1,9 @@
-from pydantic import BaseModel
 import abc
-from typing import Callable
-from contentctl.objects.test_config import TestConfig
 import datetime
+
+from pydantic import BaseModel
+
+from contentctl.objects.test_config import TestConfig
 from contentctl.actions.detection_testing.infrastructures.DetectionTestingInfrastructure import (
     DetectionTestingManagerOutputDto,
 )
@@ -10,6 +11,7 @@
 from contentctl.objects.enums import DetectionStatus
 
 
+# TODO: adjust the view to report from test_groups instead of tests
 class DetectionTestingView(BaseModel, abc.ABC):
     config: TestConfig
     sync_obj: DetectionTestingManagerOutputDto
@@ -40,7 +42,7 @@ def getCurrent(self) -> list[str]:
         ]
 
     def getRuntime(self) -> datetime.timedelta:
-        if self.sync_obj.start_time == None:
+        if self.sync_obj.start_time is None:
             raise Exception("Unknown Time")
         runtime = datetime.datetime.now() - self.sync_obj.start_time
         runtime -= datetime.timedelta(microseconds=runtime.microseconds)
@@ -64,47 +66,65 @@ def getETA(self) -> datetime.timedelta:
             remaining_time -= datetime.timedelta(
                 microseconds=remaining_time.microseconds
             )
-        except:
+        except Exception:
             raise Exception("Unknown ETA")
         return remaining_time
 
     def getSummaryObject(
         self,
-        test_model_fields: list[str] = ["success", "message", "exception"],
+        test_result_fields: list[str] = ["success", "message", "exception", "status", "duration"],
         test_job_fields: list[str] = ["resultCount", "runDuration"],
     ) -> dict:
-        total_untested = len(self.sync_obj.inputQueue)
-
+        """
+        Iterates over detections, consolidating results into a single dict and aggregating metrics
+        :param test_result_fields: fields to pull from the test result
+        :param test_job_fields: fields to pull from the job content of the test result
+        :returns: summary dict
+        """
+        # Init the list of tested detections, and some metrics aggregate counters
         tested_detections = []
         total_pass = 0
         total_fail = 0
+
+        # Iterate the detections tested (anything in the output queue was tested)
         for detection in self.sync_obj.outputQueue:
+            # Get a summary dict of the testing of the detection
             summary = detection.get_summary(
-                test_job_fields=test_job_fields, test_model_fields=test_model_fields
+                test_job_fields=test_job_fields, test_result_fields=test_result_fields
             )
-            if summary["success"] == True:
+
+            # Aggregate detection pass/fail metrics
+            if summary["success"] is True:
                 total_pass += 1
             else:
                 total_fail += 1
+
+            # Append to our list
             tested_detections.append(summary)
-        # All failures appear first
+
+        # Sort s.t. all failures appear first (then by name)
         tested_detections.sort(key=lambda x: (x["success"], x["name"]))
 
+        # Aggregate summaries for the untested detections (anything still in the input queue was untested)
+        total_untested = len(self.sync_obj.inputQueue)
         untested_detections = []
         for detection in self.sync_obj.inputQueue:
             untested_detections.append(detection.get_summary())
-        # All failures appear first
-        untested_detections.sort(key=lambda x: x["name"])
 
-        experimental_detections = sorted([detection.name for detection in self.sync_obj.skippedQueue if detection.status == DetectionStatus.experimental.value])
-        deprecated_detections = sorted([detection.name for detection in self.sync_obj.skippedQueue if detection.status == DetectionStatus.deprecated.value])
+        # Sort by detection name
+        untested_detections.sort(key=lambda x: x["name"])
 
+        # Get lists of detections (name only) that were skipped due to their status (experimental or deprecated)
+        experimental_detections = sorted([detection.name for detection in self.sync_obj.skippedQueue if detection.status == DetectionStatus.experimental.value])  # noqa: E501
+        deprecated_detections = sorted([detection.name for detection in self.sync_obj.skippedQueue if detection.status == DetectionStatus.deprecated.value])  # noqa: E501
 
+        # If any detection failed, the overall success is False
         if (total_fail + len(untested_detections)) == 0:
             overall_success = True
         else:
             overall_success = False
 
+        # Compute the percentage of completion for testing, as well as the success rate
         percent_complete = Utils.getPercent(
             len(tested_detections), len(untested_detections), 1
         )
@@ -112,6 +132,7 @@ def getSummaryObject(
             total_pass, total_fail + total_pass + total_untested, 1
         )
 
+        # Construct and return the larger results dict
         result_dict = {
             "summary": {
                 "success": overall_success,
diff --git a/contentctl/actions/detection_testing/views/DetectionTestingViewWeb.py b/contentctl/actions/detection_testing/views/DetectionTestingViewWeb.py
index 32bc1afc..fe1289ae 100644
--- a/contentctl/actions/detection_testing/views/DetectionTestingViewWeb.py
+++ b/contentctl/actions/detection_testing/views/DetectionTestingViewWeb.py
@@ -137,7 +137,7 @@ def showStatus(self, interval: int = 60):
 
         jinja2_template = jinja2.Environment().from_string(STATUS_TEMPLATE)
         summary_dict = self.getSummaryObject(
-            test_model_fields=["success", "message", "sid_link"]
+            test_result_fields=["success", "message", "sid_link"]
         )
 
         res = jinja2_template.render(
diff --git a/contentctl/contentctl.py b/contentctl/contentctl.py
index fd95b51b..60895849 100644
--- a/contentctl/contentctl.py
+++ b/contentctl/contentctl.py
@@ -169,6 +169,7 @@ def test(args: argparse.Namespace):
         config.test.detections_list = args.detections_list
     if args.enable_integration_testing or config.test.enable_integration_testing:
         config.test.enable_integration_testing = True
+    # TODO: add setting to skip listing skips
 
     # validate and setup according to infrastructure type
     if config.test.infrastructure_config.infrastructure_type == DetectionTestingTargetInfrastructure.container:
@@ -569,7 +570,6 @@ def main():
             "please review https://dev.splunk.com/enterprise/reference/appinspect/appinspectapiepref"
         )
     )
-    # TODO: add this flag to the config?
     test_parser.add_argument(
         "--enable_integration_testing",
         required=False,
@@ -577,6 +577,7 @@ def main():
         help="Whether integration testing should be enabled, in addition to unit testing (requires a configured Splunk "
         "instance with ES installed)"
     )
+    # TODO: add setting to skip listing skips
 
     test_parser.set_defaults(func=test)
 
diff --git a/contentctl/input/detection_builder.py b/contentctl/input/detection_builder.py
index 163654a2..7ad14169 100644
--- a/contentctl/input/detection_builder.py
+++ b/contentctl/input/detection_builder.py
@@ -10,6 +10,7 @@
 from contentctl.objects.macro import Macro
 from contentctl.objects.lookup import Lookup
 from contentctl.objects.mitre_attack_enrichment import MitreAttackEnrichment
+from contentctl.objects.integration_test import IntegrationTest
 from contentctl.enrichments.cve_enrichment import CveEnrichment
 from contentctl.enrichments.splunk_app_enrichment import SplunkAppEnrichment
 from contentctl.objects.config import ConfigDetectionConfiguration
@@ -280,6 +281,20 @@ def addDatamodel(self) -> None:
                 if data_model in self.security_content_obj.search:
                     self.security_content_obj.datamodel.append(data_model)
 
+    def skipIntegrationTests(self) -> None:
+        """
+        Skip all integration tests
+        """
+        # Sanity check for typing and in setObject wasn't called yet 
+        if self.security_content_obj is not None and isinstance(self.security_content_obj, Detection):
+            for test in self.security_content_obj.tests:
+                if isinstance(test, IntegrationTest):
+                    test.skip("Skipping all integration tests")
+        else:
+            raise ValueError(
+                "security_content_obj must be an instance of Detection to skip integration tests, "
+                f"not {type(self.security_content_obj)}"
+                )
 
     def reset(self) -> None:
         self.security_content_obj = None
diff --git a/contentctl/input/director.py b/contentctl/input/director.py
index c4669078..949bb2ff 100644
--- a/contentctl/input/director.py
+++ b/contentctl/input/director.py
@@ -216,6 +216,10 @@ def constructDetection(self, builder: DetectionBuilder, file_path: str) -> None:
         if self.input_dto.config.enrichments.splunk_app_enrichment:
             builder.addSplunkApp()
 
+        # Skip all integration tests if configured to do so
+        if not self.input_dto.config.test.enable_integration_testing:
+            builder.skipIntegrationTests()
+
 
     def constructSSADetection(self, builder: DetectionBuilder, file_path: str) -> None:
         builder.reset()
diff --git a/contentctl/objects/abstract_security_content_objects/detection_abstract.py b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
index f9410008..a0587fc8 100644
--- a/contentctl/objects/abstract_security_content_objects/detection_abstract.py
+++ b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
@@ -12,6 +12,7 @@
 from contentctl.objects.detection_tags import DetectionTags
 from contentctl.objects.config import ConfigDetectionConfiguration
 from contentctl.objects.unit_test import UnitTest
+from contentctl.objects.integration_test import IntegrationTest
 from contentctl.objects.macro import Macro
 from contentctl.objects.lookup import Lookup
 from contentctl.objects.baseline import Baseline
@@ -37,7 +38,7 @@ class Detection_Abstract(SecurityContentObject):
     # TODO: consider restructuring this as a set of associated tests; each test should be an individual case, but 2
     #   tests may rely on the same data and thus it doesn't make sense to replay the data twice; similarly, it doesn't
     #   make sense to run an integration test if the unit test failed. Currently, IntegrationTest is a field in UnitTest
-    tests: list[UnitTest] = []
+    tests: list[Union[UnitTest, IntegrationTest]] = []
 
     # enrichments
     datamodel: list = None
@@ -194,27 +195,50 @@ def all_tests_successful(self) -> bool:
     def get_summary(
         self,
         detection_fields: list[str] = ["name", "search"],
-        test_model_fields: list[str] = ["success", "message", "exception"],
+        test_result_fields: list[str] = ["success", "message", "exception", "status", "duration"],
         test_job_fields: list[str] = ["resultCount", "runDuration"],
     ) -> dict:
+        """
+        Aggregates a dictionary summarizing the detection model, including all test results
+        :param detection_fields: the fields of the top level detection to gather
+        :param test_result_fields: the fields of the test result(s) to gather
+        :param test_job_fields: the fields of the test result(s) job content to gather
+        :returns: a dict summary
+        """
+        # Init the summary dict
         summary_dict = {}
+
+        # Grab the top level detection fields
         for field in detection_fields:
             summary_dict[field] = getattr(self, field)
+
+        # Set success based on whether all tests passed
         summary_dict["success"] = self.all_tests_successful()
+
+        # Aggregate test results
         summary_dict["tests"] = []
         for test in self.tests:
-            result: dict[str, Union[str, bool]] = {"name": test.name}
+            # Initialize the dict as a mapping of strings to str/bool
+            result: dict[str, Union[str, bool]] = {
+                "name": test.name,
+                "test_type": test.test_type.value
+            }
+
+            # If result is not None, get a summary of the test result w/ the requested fields
             if test.result is not None:
                 result.update(
                     test.result.get_summary_dict(
-                        model_fields=test_model_fields,
+                        model_fields=test_result_fields,
                         job_fields=test_job_fields,
                     )
                 )
             else:
+                # If no test result, consider it a failure
                 result["success"] = False
                 result["message"] = "RESULT WAS NONE"
 
+            # Add the result to our list
             summary_dict["tests"].append(result)
 
+        # Return the summary
         return summary_dict
diff --git a/contentctl/objects/base_test.py b/contentctl/objects/base_test.py
index c9221630..745b88d1 100644
--- a/contentctl/objects/base_test.py
+++ b/contentctl/objects/base_test.py
@@ -1,5 +1,6 @@
 from enum import Enum
 from typing import Union
+from abc import ABC, abstractmethod
 
 from pydantic import BaseModel
 
@@ -13,8 +14,11 @@ class TestType(str, Enum):
     UNIT = "unit"
     INTEGRATION = "integration"
 
+    def __str__(self) -> str:
+        return self.value
 
-class BaseTest(BaseModel):
+
+class BaseTest(BaseModel, ABC):
     """
     A test case for a detection
     """
@@ -32,3 +36,7 @@ class BaseTest(BaseModel):
 
     # The test result
     result: Union[None, BaseTestResult] = None
+
+    @abstractmethod
+    def skip(self) -> None:
+        pass
diff --git a/contentctl/objects/base_test_result.py b/contentctl/objects/base_test_result.py
index 88e5dd08..592e8f12 100644
--- a/contentctl/objects/base_test_result.py
+++ b/contentctl/objects/base_test_result.py
@@ -1,6 +1,28 @@
+from typing import Union
+from enum import Enum
+
 from pydantic import BaseModel
+from splunklib.data import Record
 
-from typing import Union
+from contentctl.helper.utils import Utils
+
+
+class TestResultStatus(str, Enum):
+    """Enum for test status (e.g. pass/fail)"""
+    # Test failed (detection did NOT fire appropriately)
+    FAIL = "fail"
+
+    # Test passed (detection fired appropriately)
+    PASS = "pass"
+
+    # Test skipped (nothing testable at present)
+    SKIP = "skip"
+
+    # Error/exception encountered during testing (e.g. network error)
+    ERROR = "error"
+
+    def __str__(self) -> str:
+        return self.value
 
 
 class BaseTestResult(BaseModel):
@@ -8,7 +30,55 @@ class BaseTestResult(BaseModel):
     exception: Union[Exception, None] = None
     success: bool = False
     duration: float = 0
+    job_content: Union[Record, None] = None
+    status: Union[TestResultStatus, None] = None
 
     class Config:
         validate_assignment = True
         arbitrary_types_allowed = True
+
+    def get_summary_dict(
+        self,
+        model_fields: list[str] = [
+            "success", "exception", "message", "sid_link", "status", "duration"
+        ],
+        job_fields: list[str] = ["search", "resultCount", "runDuration"],
+    ) -> dict:
+        """
+        Aggregates a dictionary summarizing the test result model
+        :param model_fields: the fields of the test result to gather
+        :param job_fields: the fields of the job content to gather
+        :returns: a dict summary
+        """
+        # Init the summary dict
+        summary_dict = {}
+
+        # Grab the fields required
+        for field in model_fields:
+            if getattr(self, field) is not None:
+                # Exceptions and enums cannot be serialized, so convert to str
+                if isinstance(getattr(self, field), Exception):
+                    summary_dict[field] = str(getattr(self, field))
+                elif isinstance(getattr(self, field), Enum):
+                    summary_dict[field] = str(getattr(self, field))
+                else:
+                    summary_dict[field] = getattr(self, field)
+
+        # Grab the job content fields required
+        for field in job_fields:
+            if self.job_content is not None:
+                value = self.job_content.get(field, None)
+
+                # convert runDuration to a fixed width string representation of a float
+                if field == "runDuration":
+                    try:
+                        value = Utils.getFixedWidth(float(value), 3)
+                    except Exception:
+                        value = Utils.getFixedWidth(0, 3)
+                summary_dict[field] = value
+            else:
+                # If no job content, set all fields to None
+                summary_dict[field] = None
+
+        # Return the summary_dict
+        return summary_dict
diff --git a/contentctl/objects/detection.py b/contentctl/objects/detection.py
index 332ceffe..e8dab83c 100644
--- a/contentctl/objects/detection.py
+++ b/contentctl/objects/detection.py
@@ -35,5 +35,10 @@ def validate_test_groups(cls, value, values) -> Union[list[TestGroup], None]:
         # iterate over the unit tests and create a TestGroup (and as a result, an IntegrationTest) for each
         test_groups: list[TestGroup] = []
         for unit_test in values["tests"]:
-            test_groups.append(TestGroup.derive_from_unit_test(unit_test, values["name"]))
+            test_group = TestGroup.derive_from_unit_test(unit_test, values["name"])
+            test_groups.append(test_group)
+
+        # now add each integration test to the list of tests
+        for test_group in test_groups:
+            values["tests"].append(test_group.integration_test)
         return test_groups
diff --git a/contentctl/objects/integration_test.py b/contentctl/objects/integration_test.py
index 34d1f634..d3249b32 100644
--- a/contentctl/objects/integration_test.py
+++ b/contentctl/objects/integration_test.py
@@ -5,6 +5,7 @@
 from contentctl.objects.base_test import BaseTest, TestType
 from contentctl.objects.unit_test import UnitTest
 from contentctl.objects.integration_test_result import IntegrationTestResult
+from contentctl.objects.base_test_result import TestResultStatus
 
 
 class IntegrationTest(BaseTest):
@@ -30,3 +31,13 @@ def derive_from_unit_test(cls, unit_test: UnitTest) -> Self:
             earliest_time=unit_test.earliest_time,
             latest_time=unit_test.latest_time,
         )
+
+    def skip(self, message: str) -> None:
+        """
+        Skip the test by setting its result status
+        :param message: the reason for skipping
+        """
+        self.result = IntegrationTestResult(
+            message=message,
+            status=TestResultStatus.SKIP
+        )
diff --git a/contentctl/objects/unit_test.py b/contentctl/objects/unit_test.py
index 9f448203..f10d622f 100644
--- a/contentctl/objects/unit_test.py
+++ b/contentctl/objects/unit_test.py
@@ -10,6 +10,7 @@
 from contentctl.objects.unit_test_attack_data import UnitTestAttackData
 from contentctl.objects.unit_test_result import UnitTestResult
 from contentctl.objects.base_test import BaseTest, TestType
+from contentctl.objects.base_test_result import TestResultStatus
 
 
 class UnitTest(BaseTest):
@@ -21,7 +22,7 @@ class UnitTest(BaseTest):
     # The test type (unit)
     test_type: TestType = Field(TestType.UNIT, const=True)
 
-    # The condition to check if the searh was successful
+    # The condition to check if the search was successful
     pass_condition: Union[str, None] = None
 
     # Baselines to be run before a unit test
@@ -32,3 +33,13 @@ class UnitTest(BaseTest):
 
     # The result of the unit test
     result: Union[None, UnitTestResult] = None
+
+    def skip(self, message: str) -> None:
+        """
+        Skip the test by setting its result status
+        :param message: the reason for skipping
+        """
+        self.result = UnitTestResult(
+            message=message,
+            status=TestResultStatus.SKIP
+        )
diff --git a/contentctl/objects/unit_test_result.py b/contentctl/objects/unit_test_result.py
index 0769bbfd..814eeb85 100644
--- a/contentctl/objects/unit_test_result.py
+++ b/contentctl/objects/unit_test_result.py
@@ -3,8 +3,7 @@
 from splunklib.data import Record
 
 from contentctl.objects.test_config import Infrastructure
-from contentctl.helper.utils import Utils
-from contentctl.objects.base_test_result import BaseTestResult
+from contentctl.objects.base_test_result import BaseTestResult, TestResultStatus
 
 FORCE_TEST_FAILURE_FOR_MISSING_OBSERVABLE = False
 
@@ -13,38 +12,9 @@
 
 
 class UnitTestResult(BaseTestResult):
-    job_content: Union[Record, None] = None
     missing_observables: list[str] = []
     sid_link: Union[None, str] = None
 
-    def get_summary_dict(
-        self,
-        model_fields: list[str] = ["success", "exception", "message", "sid_link"],
-        job_fields: list[str] = ["search", "resultCount", "runDuration"],
-    ) -> dict:
-        results_dict = {}
-        for field in model_fields:
-            if getattr(self, field) is not None:
-                if isinstance(getattr(self, field), Exception):
-                    # Exception cannot be serialized, so convert to str
-                    results_dict[field] = str(getattr(self, field))
-                else:
-                    results_dict[field] = getattr(self, field)
-
-        for field in job_fields:
-            if self.job_content is not None:
-                value = self.job_content.get(field, None)
-                if field == "runDuration":
-                    try:
-                        value = Utils.getFixedWidth(float(value), 3)
-                    except Exception as e:
-                        value = Utils.getFixedWidth(0, 3)
-                results_dict[field] = value
-            else:
-                results_dict[field] = None
-
-        return results_dict
-
     def set_job_content(
         self,
         content: Union[Record, None],
@@ -61,12 +31,11 @@ def set_job_content(
         # Set the job content, if given
         if content is not None:
             self.job_content = content
-            
+
             if success:
                 self.message = "TEST PASSED"
             else:
                 self.message = "TEST FAILED"
-            
 
             if not config.instance_address.startswith("http://"):
                 sid_template = f"http://{SID_TEMPLATE}"
@@ -85,6 +54,13 @@ def set_job_content(
             self.message = f"Error during test: {str(content)}"
             self.sid_link = NO_SID
 
-        return self.success
+        # Set status if the test was not already skipped
+        if self.status != TestResultStatus.SKIP:
+            if self.exception is not None:
+                self.status = TestResultStatus.ERROR
+            elif not self.success:
+                self.status = TestResultStatus.FAIL
+            else:
+                self.status = TestResultStatus.PASS
 
-    
\ No newline at end of file
+        return self.success

From 7accc8b53053b252f4898d74c6cf1789afdd4e2a Mon Sep 17 00:00:00 2001
From: Casey McGinley <cmcginley@splunk.com>
Date: Thu, 16 Nov 2023 17:22:00 -0800
Subject: [PATCH 06/22] breaking untested and failed into separate metrics;
 fixing success computation to not include skipped tests

---
 .../detection_testing/views/DetectionTestingView.py    | 10 +++++++---
 contentctl/actions/test.py                             |  5 ++++-
 .../detection_abstract.py                              |  6 ++++++
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/contentctl/actions/detection_testing/views/DetectionTestingView.py b/contentctl/actions/detection_testing/views/DetectionTestingView.py
index f8c66b50..9bbf4cf2 100644
--- a/contentctl/actions/detection_testing/views/DetectionTestingView.py
+++ b/contentctl/actions/detection_testing/views/DetectionTestingView.py
@@ -124,21 +124,25 @@ def getSummaryObject(
         else:
             overall_success = False
 
+        # Compute total detections
+        total_detections = total_fail + total_pass + total_untested
+
         # Compute the percentage of completion for testing, as well as the success rate
         percent_complete = Utils.getPercent(
             len(tested_detections), len(untested_detections), 1
         )
         success_rate = Utils.getPercent(
-            total_pass, total_fail + total_pass + total_untested, 1
+            total_pass, total_detections, 1
         )
 
         # Construct and return the larger results dict
         result_dict = {
             "summary": {
                 "success": overall_success,
-                "total_detections": total_pass + total_fail,
+                "total_detections": total_detections,
                 "total_pass": total_pass,
-                "total_fail_or_untested": total_fail + total_untested,
+                "total_fail": total_fail,
+                "total_untested": total_untested,
                 "total_experimental_or_deprecated": len(deprecated_detections+experimental_detections),
                 "success_rate": success_rate,
             },
diff --git a/contentctl/actions/test.py b/contentctl/actions/test.py
index b9c51407..db48f4c2 100644
--- a/contentctl/actions/test.py
+++ b/contentctl/actions/test.py
@@ -88,7 +88,10 @@ def execute(self, input_dto: TestInputDto) -> bool:
                 f"\tPassed Detections            : {summary.get('total_pass','ERROR')}"
             )
             print(
-                f"\tFailed or Untested Detections: {summary.get('total_fail_or_untested','ERROR')}"
+                f"\tFailed Detections            : {summary.get('total_fail','ERROR')}"
+            )
+            print(
+                f"\tUntested Detections          : {summary.get('total_untested','ERROR')}"
             )
             print(f"\tTest Results File            : {file.getOutputFilePath()}")
             return summary_results.get("summary", {}).get("success", False)
diff --git a/contentctl/objects/abstract_security_content_objects/detection_abstract.py b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
index a0587fc8..54f5e5c2 100644
--- a/contentctl/objects/abstract_security_content_objects/detection_abstract.py
+++ b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
@@ -13,6 +13,7 @@
 from contentctl.objects.config import ConfigDetectionConfiguration
 from contentctl.objects.unit_test import UnitTest
 from contentctl.objects.integration_test import IntegrationTest
+from contentctl.objects.base_test_result import TestResultStatus
 from contentctl.objects.macro import Macro
 from contentctl.objects.lookup import Lookup
 from contentctl.objects.baseline import Baseline
@@ -188,6 +189,11 @@ def all_tests_successful(self) -> bool:
         if len(self.tests) == 0:
             return False
         for test in self.tests:
+            # Ignore any skipped tests
+            if (test.result is not None) and (test.result.status == TestResultStatus.SKIP):
+                continue
+
+            # If any result is missing or if any has a failure, the return False
             if test.result is None or test.result.success is False:
                 return False
         return True

From bf2660dba25ba4f100b204f99e81703d16b7582a Mon Sep 17 00:00:00 2001
From: Casey McGinley <cmcginley@splunk.com>
Date: Thu, 16 Nov 2023 18:02:52 -0800
Subject: [PATCH 07/22] added logic to ignore skipped tests when computing
 detection pass/fail; added logic to full skip data replay if all tests in a
 test_group have been skipped

---
 .../DetectionTestingInfrastructure.py         | 26 ++++++++++++---
 .../detection_abstract.py                     |  1 +
 contentctl/objects/test_group.py              | 32 +++++++++++++++++++
 3 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
index 0054a408..1a3421d3 100644
--- a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
+++ b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
@@ -459,6 +459,20 @@ def test_detection(self, detection: Detection):
 
         # iterate TestGroups
         for test_group in detection.test_groups:
+            test_group.unit_test.skip("skip")
+            # If all tests in the group have been skipped, report and continue
+            if test_group.all_tests_skipped():
+                self.pbar.write(
+                    self.format_pbar_string(
+                        TestReportingType.GROUP,
+                        test_group.name,
+                        FinalTestingStates.SKIP.value,
+                        time.time(),
+                        set_pbar=False,
+                    )
+                )
+                continue
+
             # replay attack_data
             setup_results = self.setup_test_group(test_group)
 
@@ -480,20 +494,22 @@ def test_detection(self, detection: Detection):
             # cleanup
             cleanup_results = self.cleanup_test_group(test_group, setup_results.start_time)
 
-            # update the results duration w/ the setup/cleanup time
-            if test_group.unit_test.result is not None:
+            # update the results duration w/ the setup/cleanup time (for those not skipped)
+            if (test_group.unit_test.result is not None) and (not test_group.unit_test_skipped()):
                 test_group.unit_test.result.duration = round(
                     test_group.unit_test.result.duration + setup_results.duration + cleanup_results.duration,
                     2
                 )
-            if test_group.integration_test.result is not None:
+            if (test_group.integration_test.result is not None) and (not test_group.integration_test_skipped()):
                 test_group.integration_test.result.duration = round(
                     test_group.integration_test.result.duration + setup_results.duration + cleanup_results.duration,
                     2
                 )
 
-            self.pbar.write(f"duration: {test_group.unit_test.result.duration}")
-            self.pbar.write(f"setup_results.start_time: {setup_results.start_time}")
+            self.pbar.write(f"unit_test.duration: {test_group.unit_test.result.duration}")
+            self.pbar.write(f"integration_test.duration: {test_group.integration_test.result.duration}")
+            self.pbar.write(f"setup_results.duration: {setup_results.duration}")
+            self.pbar.write(f"cleanup_results.duration: {cleanup_results.duration}")
 
             # Write test group status
             self.pbar.write(
diff --git a/contentctl/objects/abstract_security_content_objects/detection_abstract.py b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
index 54f5e5c2..0ae85124 100644
--- a/contentctl/objects/abstract_security_content_objects/detection_abstract.py
+++ b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
@@ -185,6 +185,7 @@ def datamodel_valid(cls, v, values):
                 raise ValueError("not valid data model: " + values["name"])
         return v
 
+    # TODO: if all of a detections tests are skipped, this will return True (is this desirable?)
     def all_tests_successful(self) -> bool:
         if len(self.tests) == 0:
             return False
diff --git a/contentctl/objects/test_group.py b/contentctl/objects/test_group.py
index d8202a75..2d80fe3f 100644
--- a/contentctl/objects/test_group.py
+++ b/contentctl/objects/test_group.py
@@ -5,6 +5,7 @@
 from contentctl.objects.unit_test import UnitTest
 from contentctl.objects.integration_test import IntegrationTest
 from contentctl.objects.unit_test_attack_data import UnitTestAttackData
+from contentctl.objects.base_test_result import TestResultStatus
 
 
 class TestGroup(BaseModel):
@@ -35,3 +36,34 @@ def derive_from_unit_test(cls, unit_test: UnitTest, name_prefix: str) -> Self:
             integration_test=integration_test,
             attack_data=unit_test.attack_data
         )
+
+    def unit_test_skipped(self) -> bool:
+        """
+        Returns true if the unit test has been skipped
+        :returns: bool
+        """
+        # Return True if skipped
+        if self.unit_test.result is not None:
+            return self.unit_test.result.status == TestResultStatus.SKIP
+
+        # If no result yet, it has not been skipped
+        return False
+
+    def integration_test_skipped(self) -> bool:
+        """
+        Returns true if the integration test has been skipped
+        :returns: bool
+        """
+        # Return True if skipped
+        if self.integration_test.result is not None:
+            return self.integration_test.result.status == TestResultStatus.SKIP
+
+        # If no result yet, it has not been skipped
+        return False
+
+    def all_tests_skipped(self) -> bool:
+        """
+        Returns true if both the unit test and integration test have been skipped
+        :returns: bool
+        """
+        return self.unit_test_skipped() and self.integration_test_skipped()

From a2534b8919ddf7c62f2b0ba451a0777bbcf1a3a3 Mon Sep 17 00:00:00 2001
From: Casey McGinley <cmcginley@splunk.com>
Date: Thu, 16 Nov 2023 18:03:20 -0800
Subject: [PATCH 08/22] removing some test code

---
 .../infrastructures/DetectionTestingInfrastructure.py            | 1 -
 1 file changed, 1 deletion(-)

diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
index 1a3421d3..2792f9e8 100644
--- a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
+++ b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
@@ -459,7 +459,6 @@ def test_detection(self, detection: Detection):
 
         # iterate TestGroups
         for test_group in detection.test_groups:
-            test_group.unit_test.skip("skip")
             # If all tests in the group have been skipped, report and continue
             if test_group.all_tests_skipped():
                 self.pbar.write(

From 26af3e454ce6f3a1acb8eb0df80474244290c616 Mon Sep 17 00:00:00 2001
From: Casey McGinley <cmcginley@splunk.com>
Date: Tue, 21 Nov 2023 23:07:37 -0800
Subject: [PATCH 09/22] adding integration testing; works, needs
 cleanup/refinement

---
 .../DetectionTestingManager.py                |   1 +
 .../DetectionTestingInfrastructure.py         | 207 +++--
 .../actions/detection_testing/progress_bar.py | 103 +++
 .../views/DetectionTestingView.py             |   2 +-
 .../detection_abstract.py                     |   2 +-
 contentctl/objects/base_test.py               |   4 +-
 contentctl/objects/base_test_result.py        |  38 +-
 contentctl/objects/correlation_search.py      | 820 ++++++++++++++++++
 contentctl/objects/integration_test_result.py |   5 +-
 contentctl/objects/notable_action.py          |  36 +
 contentctl/objects/risk_analysis_action.py    |  70 ++
 contentctl/objects/risk_object.py             |  18 +
 contentctl/objects/test_config.py             |   1 +
 contentctl/objects/threat_object.py           |  15 +
 contentctl/objects/unit_test_result.py        |   1 -
 15 files changed, 1233 insertions(+), 90 deletions(-)
 create mode 100644 contentctl/actions/detection_testing/progress_bar.py
 create mode 100644 contentctl/objects/correlation_search.py
 create mode 100644 contentctl/objects/notable_action.py
 create mode 100644 contentctl/objects/risk_analysis_action.py
 create mode 100644 contentctl/objects/risk_object.py
 create mode 100644 contentctl/objects/threat_object.py

diff --git a/contentctl/actions/detection_testing/DetectionTestingManager.py b/contentctl/actions/detection_testing/DetectionTestingManager.py
index 70f9f2e1..2b4f2c24 100644
--- a/contentctl/actions/detection_testing/DetectionTestingManager.py
+++ b/contentctl/actions/detection_testing/DetectionTestingManager.py
@@ -128,6 +128,7 @@ def sigint_handler(signum, frame):
                     except Exception as e:
                         self.output_dto.terminate = True
                         print(f"Error running in container: {str(e)}")
+                        raise e
 
             self.output_dto.terminate = True
 
diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
index 2792f9e8..c3efe29d 100644
--- a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
+++ b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
@@ -13,7 +13,6 @@
 from dataclasses import dataclass
 from shutil import copyfile
 from typing import Union
-from enum import Enum
 
 from pydantic import BaseModel, PrivateAttr, Field
 import requests
@@ -34,63 +33,39 @@
 from contentctl.objects.test_config import TestConfig, Infrastructure
 from contentctl.objects.test_group import TestGroup
 from contentctl.objects.base_test_result import TestResultStatus
+from contentctl.objects.correlation_search import CorrelationSearch, PbarData
 from contentctl.helper.utils import Utils
 from contentctl.actions.detection_testing.DataManipulation import (
     DataManipulation,
 )
+from contentctl.actions.detection_testing.progress_bar import (
+    format_pbar_string,
+    TestReportingType,
+    FinalTestingStates,
+    TestingStates
+)
 
 
-class TestReportingType(str, Enum):
-    """
-    5-char identifiers for the type of testing being reported on
-    """
-    # Reporting around general testing setup (e.g. infra, role configuration)
-    SETUP = "SETUP"
-
-    # Reporting around a group of tests
-    GROUP = "GROUP"
-
-    # Reporting around a unit test
-    UNIT = "UNIT "
-
-    # Reporting around an integration test
-    INTEGRATION = "INTEG"
-
-
-class TestingStates(str, Enum):
-    """
-    Defined testing states
-    """
-    BEGINNING_GROUP = "Beginning Test Group"
-    BEGINNING_TEST = "Beginning Test"
-    DOWNLOADING = "Downloading Data"
-    REPLAYING = "Replaying Data"
-    PROCESSING = "Waiting for Processing"
-    SEARCHING = "Running Search"
-    DELETING = "Deleting Data"
-    DONE_GROUP = "Test Group Done"
-
-
-# the longest length of any state
-LONGEST_STATE = max(len(w.value) for w in TestingStates)
-
-
-class FinalTestingStates(str, Enum):
-    """
-    The possible final states for a test (for pbar reporting)
-    """
-    FAIL = "\x1b[0;30;41m" + "FAIL".ljust(LONGEST_STATE) + "\x1b[0m"
-    PASS = "\x1b[0;30;42m" + "PASS".ljust(LONGEST_STATE) + "\x1b[0m"
-    SKIP = "\x1b[0;30;47m" + "SKIP".ljust(LONGEST_STATE) + "\x1b[0m"
-
-
-# max length of a test name
-# TODO: this max size is declared, and it is used appropriatel w/ .ljust, but nothing truncates test names to makes 
-#   them the appropriate size
-MAX_TEST_NAME_LENGTH = 70
-
-# The format string used for pbar reporting
-PBAR_FORMAT_STRING = "[{test_reporting_type}] {test_name} >> {state} | Time: {time}"
+# TODO: cleanup extra prints/logs, add docstrings/comments
+# TODO: test known failures/successes
+# TODO: list SKIP, FAIL, PASS, ERROR conditions explicitly
+# TODO: SKIP/FAIL integration test on unit test fail/error (and SKIP?)
+# TODO: consolidate logic around success vs. status across unit and integration tests
+# TODO: validate duration, wait_duration, runDuration
+# TODO: consider renaming duration, wait_duration for clarity
+# TODO: add flag for enabling logging for correlation_search logging (?)
+# TODO: add flag for changing max_sleep time for integration tests
+# TODO: update extractor to work w/ new fields in output
+# TODO: look into getting better time elapsed updates on pbar statuses
+# TODO: add stats around total test cases and unit/integration test sucess/failure? maybe configurable reporting?
+# TODO: do a full package test
+# TODO: look into how index/risk index clearing surfaces to contentctl as an error (if at all)
+# TODO: look into why some pbar statuses seem to be overwriting each other (my status reporting and the
+#   completed/elapsed counter)
+# TODO: investigate failures (true, transient, latency?)
+# TODO: why is the unit test duration so long on Risk Rule for Dev Sec Ops by Repository? big dataset? long
+#   cleanup/upload? retries in clearing the "stash" index? Maybe the stash index gets super big... we shouldn't be 
+#   testing on Correlation type detections anyway
 
 
 class SetupTestGroupResults(BaseModel):
@@ -142,6 +117,7 @@ class Config:
     def __init__(self, **data):
         super().__init__(**data)
 
+    # TODO: why not use @abstractmethod
     def start(self):
         raise (
             NotImplementedError(
@@ -149,6 +125,7 @@ def start(self):
             )
         )
 
+    # TODO: why not use @abstractmethod
     def get_name(self) -> str:
         raise (
             NotImplementedError(
@@ -427,8 +404,8 @@ def execute(self):
                 detection = self.sync_obj.inputQueue.pop()
                 if detection.status != DetectionStatus.production.value:
                     # NOTE: we handle skipping entire detections differently than we do skipping individual test cases;
-                    #   we skip entire detections by excluding them to an entirely separate queue, while we skip 
-                    #   individual test cases via the BaseTest.skip() method, such as when we are skipping all 
+                    #   we skip entire detections by excluding them to an entirely separate queue, while we skip
+                    #   individual test cases via the BaseTest.skip() method, such as when we are skipping all
                     #   integration tests (see DetectionBuilder.skipIntegrationTests)
                     self.sync_obj.skippedQueue.append(detection)
                     self.pbar.write(f"\nSkipping {detection.name} since it is status: {detection.status}\n")
@@ -448,6 +425,7 @@ def execute(self):
                 return
             except Exception as e:
                 self.pbar.write(f"Error testing detection: {type(e).__name__}: {str(e)}")
+                raise e
             finally:
                 self.sync_obj.outputQueue.append(detection)
                 self.sync_obj.currentTestingQueue[self.get_name()] = None
@@ -483,6 +461,7 @@ def test_detection(self, detection: Detection):
             # run unit test
             self.execute_unit_test(detection, test_group.unit_test, setup_results)
 
+            # TODO: if unit test fails, SKIP or FAIL the integration test
             # run integration test
             self.execute_integration_test(
                 detection,
@@ -583,26 +562,39 @@ def format_pbar_string(
         set_pbar: bool = True,
         update_sync_status: bool = False,
     ) -> str:
+        """
+        Instance specific function to log testing information via pbar; returns a formatted string
+        that can be written and optionally updates the existing progress bar
+        :param test_reporting_type: the type of reporting to be done (e.g. unit, integration, group)
+        :param test_name: the name of the test to be logged
+        :param state: the state/message of the test to be logged
+        :param start_time: the start_time of this progres bar
+        :param set_pbar: bool indicating whether pbar.update should be called
+        :param update_sync_status: bool indicating whether a sync status update should be queued
+        :returns: a formatted string for use w/ pbar
+        """
+        # set start time id not provided
         if start_time is None:
             start_time = self.start_time
-        field_one = test_reporting_type.value
-        field_two = test_name.ljust(MAX_TEST_NAME_LENGTH)
-        field_three = state.ljust(LONGEST_STATE)
-        field_four = datetime.timedelta(seconds=round(time.time() - start_time))
-        new_string = PBAR_FORMAT_STRING.format(
-            test_reporting_type=field_one,
-            test_name=field_two,
-            state=field_three,
-            time=field_four,
+
+        # invoke the helper method
+        new_string = format_pbar_string(
+            self.pbar,
+            test_reporting_type,
+            test_name,
+            state,
+            start_time,
+            set_pbar
         )
-        if set_pbar:
-            self.pbar.bar_format = new_string
-            self.pbar.update()
+
+        # update sync status if needed
         if update_sync_status:
             self.sync_obj.currentTestingQueue[self.get_name()] = {
                 "name": state,
                 "search": "N/A",
             }
+
+        # return the formatted string
         return new_string
 
     def execute_unit_test(
@@ -762,7 +754,6 @@ def execute_integration_test(
         detection: Detection,
         test: IntegrationTest,
         setup_results: SetupTestGroupResults,
-        FORCE_ALL_TIME: bool = True
     ):
         """
         Executes an integration test on the detection
@@ -785,7 +776,7 @@ def execute_integration_test(
             )
             return
 
-        # Reset the pbar and print that we are beginning a unit test
+        # Reset the pbar and print that we are beginning an integration test
         self.pbar.reset()
         self.format_pbar_string(
             TestReportingType.INTEGRATION,
@@ -794,11 +785,54 @@ def execute_integration_test(
             test_start_time,
         )
 
+        # if the replay failed, record the test failure and return
+        if not setup_results.success:
+            status: TestResultStatus
+            if setup_results.exception is not None:
+                status = TestResultStatus.ERROR
+            else:
+                status = TestResultStatus.FAIL
+            test.result = IntegrationTestResult(
+                message="TEST FAILED: something went wrong during during TestGroup setup (e.g. attack data replay)",
+                exception=setup_results.exception,
+                duration=round(time.time() - test_start_time, 2),
+                status=status
+            )
+
+            # report the failure to the CLI
+            self.pbar.write(
+                self.format_pbar_string(
+                    TestReportingType.INTEGRATION,
+                    f"{detection.name}:{test.name}",
+                    FinalTestingStates.FAIL.value,
+                    start_time=test_start_time,
+                    set_pbar=False,
+                )
+            )
+
+            return
+
+        # Run the test
         try:
-            raise NotImplementedError
-        except NotImplementedError:
-            test.result = IntegrationTestResult()
-            test.result.success = False
+            pbar_data = PbarData(
+                pbar=self.pbar,
+                fq_test_name=f"{detection.name}:{test.name}",
+                start_time=test_start_time
+            )
+            correlation_search = CorrelationSearch(
+                detection_name=detection.name,
+                service=self.get_conn(),
+                pbar_data=pbar_data,
+            )
+            # TODO: add max_sleep as a configurable value on cli/config
+            test.result = correlation_search.test()
+        except Exception as e:
+            # Catch and report and unhandled exceptions in integration testing
+            test.result = IntegrationTestResult(
+                message="TEST FAILED: unhandled exception in CorrelationSearch",
+                exception=e,
+                status=TestResultStatus.ERROR
+            )
 
         # Pause here if the terminate flag has NOT been set AND either of the below are true:
         #   1. the behavior is always_pause
@@ -807,20 +841,14 @@ def execute_integration_test(
             self.global_config.post_test_behavior == PostTestBehavior.always_pause
             or (
                 self.global_config.post_test_behavior == PostTestBehavior.pause_on_failure
-                and (test.result is None or test.result.success is False)
+                and (test.result is None or test.result.failed_and_complete())
             )
         ) and not self.sync_obj.terminate:
             # Determine the state to report to the user
             if test.result is None:
                 res = "ERROR"
-                # link = detection.search
             else:
-                res = test.result.success
-                if res:
-                    res = "PASS"
-                else:
-                    res = "FAIL"
-                # link = test.result.get_summary_dict()["sid_link"]
+                res = test.result.status.value
 
             self.format_pbar_string(
                 TestReportingType.INTEGRATION,
@@ -835,7 +863,8 @@ def execute_integration_test(
             except Exception:
                 pass
 
-        if test.result is not None and test.result.success:
+        # Report a pass
+        if (test.result is not None) and (test.result.status == TestResultStatus.PASS):
             self.pbar.write(
                 self.format_pbar_string(
                     TestReportingType.INTEGRATION,
@@ -845,6 +874,17 @@ def execute_integration_test(
                     set_pbar=False,
                 )
             )
+        elif (test.result is not None) and (test.result.status == TestResultStatus.SKIP):
+            # Report a skip
+            self.pbar.write(
+                self.format_pbar_string(
+                    TestReportingType.INTEGRATION,
+                    f"{detection.name}:{test.name}",
+                    FinalTestingStates.SKIP.value,
+                    start_time=test_start_time,
+                    set_pbar=False,
+                )
+            )
         else:
             # report the failure to the CLI
             self.pbar.write(
@@ -857,6 +897,11 @@ def execute_integration_test(
                 )
             )
 
+        # Flush stdout and set duration
+        stdout.flush()
+        if test.result is not None:
+            test.result.duration = round(time.time() - test_start_time, 2)
+
     def retry_search_until_timeout(
         self,
         detection: Detection,
diff --git a/contentctl/actions/detection_testing/progress_bar.py b/contentctl/actions/detection_testing/progress_bar.py
new file mode 100644
index 00000000..df892a9c
--- /dev/null
+++ b/contentctl/actions/detection_testing/progress_bar.py
@@ -0,0 +1,103 @@
+import time
+from enum import Enum
+from tqdm import tqdm
+import datetime
+
+
+class TestReportingType(str, Enum):
+    """
+    5-char identifiers for the type of testing being reported on
+    """
+    # Reporting around general testing setup (e.g. infra, role configuration)
+    SETUP = "SETUP"
+
+    # Reporting around a group of tests
+    GROUP = "GROUP"
+
+    # Reporting around a unit test
+    UNIT = "UNIT "
+
+    # Reporting around an integration test
+    INTEGRATION = "INTEG"
+
+
+class TestingStates(str, Enum):
+    """
+    Defined testing states
+    """
+    BEGINNING_GROUP = "Beginning Test Group"
+    BEGINNING_TEST = "Beginning Test"
+    DOWNLOADING = "Downloading Data"
+    REPLAYING = "Replaying Data"
+    PROCESSING = "Waiting for Processing"
+    SEARCHING = "Running Search"
+    DELETING = "Deleting Data"
+    DONE_GROUP = "Test Group Done"
+    PRE_CLEANUP = "Pre-run Cleanup"
+    POST_CLEANUP = "Post-run Cleanup"
+    FORCE_RUN = "Forcing Detection Run"
+    VALIDATING = "Validating Risks/Notables"
+
+
+# the longest length of any state
+LONGEST_STATE = max(len(w.value) for w in TestingStates)
+
+
+class FinalTestingStates(str, Enum):
+    """
+    The possible final states for a test (for pbar reporting)
+    """
+    FAIL = "\x1b[0;30;41m" + "FAIL".ljust(LONGEST_STATE) + "\x1b[0m"
+    PASS = "\x1b[0;30;42m" + "PASS".ljust(LONGEST_STATE) + "\x1b[0m"
+    SKIP = "\x1b[0;30;47m" + "SKIP".ljust(LONGEST_STATE) + "\x1b[0m"
+
+
+# max length of a test name
+# TODO: this max size is declared, and it is used appropriatel w/ .ljust, but nothing truncates test names to makes
+#   them the appropriate size
+MAX_TEST_NAME_LENGTH = 70
+
+# The format string used for pbar reporting
+PBAR_FORMAT_STRING = "[{test_reporting_type}] {test_name} >> {state} | Time: {time}"
+
+
+def format_pbar_string(
+    pbar: tqdm,
+    test_reporting_type: TestReportingType,
+    test_name: str,
+    state: str,
+    start_time: float,
+    set_pbar: bool = True,
+) -> str:
+    """
+    Utility function to log testing information via pbar; returns a formatted string that can be
+    written and optionally updates the existing progress bar
+    :param pbar: a tqdm instance to use for updating
+    :param test_reporting_type: the type of reporting to be done (e.g. unit, integration, group)
+    :param test_name: the name of the test to be logged
+    :param state: the state/message of the test to be logged
+    :param start_time: the start_time of this progres bar
+    :param set_pbar: bool indicating whether pbar.update should be called
+    :returns: a formatted string for use w/ pbar
+    """
+    # Extract and ljust our various fields
+    field_one = test_reporting_type.value
+    field_two = test_name.ljust(MAX_TEST_NAME_LENGTH)
+    field_three = state.ljust(LONGEST_STATE)
+    field_four = datetime.timedelta(seconds=round(time.time() - start_time))
+
+    # Format the string
+    new_string = PBAR_FORMAT_STRING.format(
+        test_reporting_type=field_one,
+        test_name=field_two,
+        state=field_three,
+        time=field_four,
+    )
+
+    # Update pbar if set
+    if set_pbar:
+        pbar.bar_format = new_string
+        pbar.update()
+
+    # Return formatted string
+    return new_string
diff --git a/contentctl/actions/detection_testing/views/DetectionTestingView.py b/contentctl/actions/detection_testing/views/DetectionTestingView.py
index 9bbf4cf2..b234910c 100644
--- a/contentctl/actions/detection_testing/views/DetectionTestingView.py
+++ b/contentctl/actions/detection_testing/views/DetectionTestingView.py
@@ -72,7 +72,7 @@ def getETA(self) -> datetime.timedelta:
 
     def getSummaryObject(
         self,
-        test_result_fields: list[str] = ["success", "message", "exception", "status", "duration"],
+        test_result_fields: list[str] = ["success", "message", "exception", "status", "duration", "wait_duration"],
         test_job_fields: list[str] = ["resultCount", "runDuration"],
     ) -> dict:
         """
diff --git a/contentctl/objects/abstract_security_content_objects/detection_abstract.py b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
index 0ae85124..788208fa 100644
--- a/contentctl/objects/abstract_security_content_objects/detection_abstract.py
+++ b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
@@ -202,7 +202,7 @@ def all_tests_successful(self) -> bool:
     def get_summary(
         self,
         detection_fields: list[str] = ["name", "search"],
-        test_result_fields: list[str] = ["success", "message", "exception", "status", "duration"],
+        test_result_fields: list[str] = ["success", "message", "exception", "status", "duration", "wait_duration"],
         test_job_fields: list[str] = ["resultCount", "runDuration"],
     ) -> dict:
         """
diff --git a/contentctl/objects/base_test.py b/contentctl/objects/base_test.py
index 745b88d1..0df68fc7 100644
--- a/contentctl/objects/base_test.py
+++ b/contentctl/objects/base_test.py
@@ -39,4 +39,6 @@ class BaseTest(BaseModel, ABC):
 
     @abstractmethod
     def skip(self) -> None:
-        pass
+        raise NotImplementedError(
+            "BaseTest test is an abstract class; skip must be implemented by subclasses"
+        )
diff --git a/contentctl/objects/base_test_result.py b/contentctl/objects/base_test_result.py
index 592e8f12..df0b7090 100644
--- a/contentctl/objects/base_test_result.py
+++ b/contentctl/objects/base_test_result.py
@@ -1,7 +1,7 @@
 from typing import Union
 from enum import Enum
 
-from pydantic import BaseModel
+from pydantic import BaseModel, validator
 from splunklib.data import Record
 
 from contentctl.helper.utils import Utils
@@ -28,19 +28,49 @@ def __str__(self) -> str:
 class BaseTestResult(BaseModel):
     message: Union[None, str] = None
     exception: Union[Exception, None] = None
+    status: Union[TestResultStatus, None] = None
     success: bool = False
     duration: float = 0
     job_content: Union[Record, None] = None
-    status: Union[TestResultStatus, None] = None
+    sid_link: Union[None, str] = None
 
     class Config:
         validate_assignment = True
         arbitrary_types_allowed = True
 
+    @validator("success", always=True)
+    @classmethod
+    def derive_success_from_status(cls, v, values) -> bool:
+        """
+        If a status is provided at initialization, we can derive success from it
+        """
+        print(f"*** VALUES ***: {values}")
+        if ("status" in values) and (values["status"] is not None):
+            if values["status"] == TestResultStatus.PASS:
+                return True
+            else:
+                if v is not False:
+                    raise ValueError(f"Status {values['status'].value} is not compatible with success={v}")
+                return False
+        return v
+
+    # TODO: ensure that all invocations of test results have status set AND consider if SKIP should be success as True
+    #   or False
+    @property
+    def failed_and_complete(self) -> bool:
+        """
+        Uses status to determine if a test was a failure; useful because success == False for a SKIP, but it is also
+        not a failure
+        :returns: bool indicating the test failed and is complete (in that it has a status)
+        """
+        if self.status is not None:
+            return self.status == TestResultStatus.FAIL or self.status == TestResultStatus.ERROR
+        return False
+
     def get_summary_dict(
         self,
         model_fields: list[str] = [
-            "success", "exception", "message", "sid_link", "status", "duration"
+            "success", "exception", "message", "sid_link", "status", "duration", "wait_duration"
         ],
         job_fields: list[str] = ["search", "resultCount", "runDuration"],
     ) -> dict:
@@ -55,7 +85,7 @@ def get_summary_dict(
 
         # Grab the fields required
         for field in model_fields:
-            if getattr(self, field) is not None:
+            if getattr(self, field, None) is not None:
                 # Exceptions and enums cannot be serialized, so convert to str
                 if isinstance(getattr(self, field), Exception):
                     summary_dict[field] = str(getattr(self, field))
diff --git a/contentctl/objects/correlation_search.py b/contentctl/objects/correlation_search.py
new file mode 100644
index 00000000..3741770b
--- /dev/null
+++ b/contentctl/objects/correlation_search.py
@@ -0,0 +1,820 @@
+import logging
+import time
+from typing import Union, Optional, Any
+from enum import Enum
+
+from pydantic import BaseModel, validator, Field
+from splunklib.results import JSONResultsReader, Message  # type: ignore
+from splunklib.binding import HTTPError, ResponseReader  # type: ignore
+import splunklib.client as splunklib  # type: ignore
+from tqdm import tqdm  # type: ignore
+
+from contentctl.objects.risk_analysis_action import RiskAnalysisAction
+from contentctl.objects.notable_action import NotableAction
+from contentctl.objects.base_test_result import TestResultStatus
+from contentctl.objects.integration_test_result import IntegrationTestResult
+from contentctl.actions.detection_testing.progress_bar import (
+    format_pbar_string,
+    TestReportingType,
+    TestingStates
+)
+
+
+# Suppress logging by default; enable for local testing
+ENABLE_LOGGING = True
+LOG_LEVEL = logging.DEBUG
+LOG_PATH = "correlation_search.log"
+
+
+def get_logger() -> logging.Logger:
+    """
+    Gets a logger instance for the module; logger is configured if not already configured. The
+    NullHandler is used to suppress loggging when running in production so as not to conflict w/
+    contentctl's larger pbar-based logging. The StreamHandler is enabled by setting ENABLE_LOGGING
+    to True (useful for debugging/testing locally)
+    """
+    # get logger for module
+    logger = logging.getLogger(__name__)
+    logger.propagate = False
+    print(f"***** Got logger *** {logger}: {__name__}")
+
+
+    # if logger has no handlers, it needs to be configured for the first time
+    print(f"***** Has handlers *** {logger.hasHandlers()}")
+    print(f"***** Has handlers 2 *** {logger.handlers}")
+    if not logger.hasHandlers():
+        print(f"***** No handlers *** {logger.handlers}")
+        # set level
+        logger.setLevel(LOG_LEVEL)
+
+        # if logging enabled, use a StreamHandler; else, use the NullHandler to suppress logging
+        handler: logging.Handler
+        if ENABLE_LOGGING:
+            handler = logging.FileHandler(LOG_PATH)
+        else:
+            handler = logging.NullHandler()
+        print(f"***** Made handlers *** {handler}")
+
+        # Format our output
+        formatter = logging.Formatter('%(asctime)s - %(levelname)s:%(name)s - %(message)s')
+        handler.setFormatter(formatter)
+        print(f"***** Made formatter *** {formatter}")
+
+        # Set handler level and add to logger
+        handler.setLevel(LOG_LEVEL)
+        logger.addHandler(handler)
+        print(f"***** Added handler *** {logger.handlers}")
+    print(f"***** HANDLERS *** {logger.handlers}")
+    return logger
+
+
+class IntegrationTestingError(Exception):
+    """Base exception class for integration testing"""
+    pass
+
+
+class ServerError(IntegrationTestingError):
+    """An error encounterd during integration testing, as provided by the server (Splunk instance)"""
+    pass
+
+
+class ClientError(IntegrationTestingError):
+    """An error encounterd during integration testing, on the client's side (locally)"""
+    pass
+
+
+class SavedSearchKeys(str, Enum):
+    """
+    Various keys into the SavedSearch content
+    """
+    # setup the names of the keys we expect to access in content
+    EARLIEST_TIME_KEY = "dispatch.earliest_time"
+    LATEST_TIME_KEY = "dispatch.latest_time"
+    CRON_SCHEDULE_KEY = "cron_schedule"
+    RISK_ACTION_KEY = "action.risk"
+    NOTABLE_ACTION_KEY = "action.notable"
+    DISBALED_KEY = "disabled"
+
+
+class Indexes(str, Enum):
+    """
+    Indexes we search against
+    """
+    # setup the names of the risk and notable indexes
+    RISK_INDEX = "risk"
+    NOTABLE_INDEX = "notable"
+
+
+class TimeoutConfig(int, Enum):
+    """
+    Configuration values for the exponential backoff timer
+    """
+    # base amount to sleep for before beginning exponential backoff during testing
+    BASE_SLEEP = 60
+
+    # max amount to wait before timing out during exponential backoff
+    MAX_SLEEP = 210
+
+
+class ScheduleConfig(str, Enum):
+    """
+    Configuraton values for the saved search schedule
+    """
+    EARLIEST_TIME: str = "-3y@y"
+    LATEST_TIME: str = "-1m@m"
+    CRON_SCHEDULE: str = "*/1 * * * *"
+
+
+class ResultIterator:
+    """An iterator wrapping the results abstractions provided by Splunk SDK
+
+    Given a ResponseReader, constructs a JSONResultsReader and iterates over it; when Message instances are encountered,
+    they are logged if the message is anything other than "error", in which case an error is raised. Regular results are
+    returned as expected
+    :param response_reader: a ResponseReader object
+    :param logger: a Logger object
+    """
+    def __init__(self, response_reader: ResponseReader) -> None:
+        self.results_reader: JSONResultsReader = JSONResultsReader(
+            response_reader)
+
+        # get logger
+        self.logger: logging.Logger = get_logger()
+
+    def __iter__(self) -> "ResultIterator":
+        return self
+
+    def __next__(self) -> dict:
+        # Use a reader for JSON format so we can iterate over our results
+        for result in self.results_reader:
+            # log messages, or raise if error
+            if isinstance(result, Message):
+                # convert level string to level int
+                level_name = result.type.strip().upper()
+                level: int = logging.getLevelName(level_name)
+
+                # log message at appropriate level and raise if needed
+                message = f"{result.type}: {result.message}"
+                self.logger.log(level, message)
+                if level == logging.ERROR:
+                    raise ServerError(message)
+
+            # if dict, just return
+            elif isinstance(result, dict):
+                return result
+
+            # raise for any unexpected types
+            else:
+                raise ClientError("Unexpected result type")
+
+        # stop iteration if we run out of things to iterate over internally
+        raise StopIteration
+
+
+class PbarData(BaseModel):
+    """
+    Simple model encapsulating a pbar instance and the data needed for logging to it
+    :param pbar: a tqdm instance to use for logging
+    :param fq_test_name: the fully qualifed (fq) test name ("<detection_name>:<test_name>") used for logging
+    :param start_time: the start time used for logging
+    """
+    pbar: tqdm
+    fq_test_name: str
+    start_time: float
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+# TODO: right now, we are creating one CorrelationSearch instance for each test case; in general, there is only one
+#   unit test, and thus one integration test, per detection, so this is not an issue. However, if we start having many
+#   test cases per detection, we will be duplicating some effort & network calls that we don't need to. Condier
+#   refactoring in order to re-use CorrelationSearch objects across tests in such a case
+class CorrelationSearch(BaseModel):
+    """Representation of a correlation search in Splunk
+
+    In Enterprise Security, a correlation search is wrapper around the saved search entity. This search represents a
+    detection rule for our purposes.
+    :param detection_name: the name of the search/detection (e.g. "Windows Modify Registry EnableLinkedConnections")
+    :param service: a Service instance representing a connection to a Splunk instance
+    :param test_index: the index attack data is forwarded to for testing (optionally used in cleanup)
+    :param pbar_data: the encapsulated info needed for logging w/ pbar
+    """
+    # our instance fields
+    detection_name: str
+
+    service: splunklib.Service
+    pbar_data: PbarData
+
+    # TODO: replace this w/ pbar stuff
+    test_index: Optional[str] = Field(default=None, min_length=1)
+
+    logger: logging.Logger = Field(default_factory=get_logger, const=True)
+
+    name: Optional[str] = None
+    splunk_path: Optional[str] = None
+    saved_search: Optional[splunklib.SavedSearch] = None
+    indexes_to_purge: set[str] = set()
+
+    # earliest_time: Optional[str] = None
+    # latest_time: Optional[str] = None
+    # cron_schedule: Optional[str] = None
+    risk_analysis_action: Union[RiskAnalysisAction, None] = None
+    notable_action: Union[NotableAction, None] = None
+
+    class Config:
+        arbitrary_types_allowed = True
+
+    # enabled: bool = False
+
+    @validator("name", always=True)
+    @classmethod
+    def _convert_detection_to_search_name(cls, v, values) -> str:
+        """
+        Validate detection name and derive if None
+        """
+        if "detection_name" not in values:
+            raise ValueError("detection_name missing; name is dependent on detection_name")
+        
+        expected_name = f"ESCU - {values['detection_name']} - Rule"
+        if v is not None and v != expected_name:
+            raise ValueError(
+                "name must be derived from detection_name; leave as None and it will be derived automatically"
+            )
+        return f"ESCU - {values['detection_name']} - Rule"
+
+    @validator("splunk_path", always=True)
+    @classmethod
+    def _derive_splunk_path(cls, v, values) -> str:
+        """
+        Validate splunk_path and derive if None
+        """
+        if "name" not in values:
+            raise ValueError("name missing; splunk_path is dependent on name")
+        
+        expected_path = f"saved/searches/{values['name']}"
+        if v is not None and v != expected_path:
+            raise ValueError(
+                "splunk_path must be derived from name; leave as None and it will be derived automatically"
+            )
+        return f"saved/searches/{values['name']}"
+
+    @validator("saved_search", always=True)
+    @classmethod
+    def _instantiate_saved_search(cls, v, values) -> str:
+        """
+        Ensure saved_search was initialized as None and derive
+        """
+        if "splunk_path" not in values or "service" not in values:
+            raise ValueError("splunk_path or service missing; saved_search is dependent on both")
+        
+        if v is not None:
+            raise ValueError(
+                "saved_search must be derived from the service and splunk_path; leave as None and it will be derived "
+                "automatically"
+            )
+        return splunklib.SavedSearch(
+            values['service'],
+            values['splunk_path'],
+        )
+
+    # @validator("risk_analysis_action", "notable_action")
+    # @classmethod
+    # def _initialized_to_none(cls, v, values) -> None:
+    #     """
+    #     Ensure a field was initialized as None
+    #     """
+    #     if v is not None:
+    #         raise ValueError("field must be initialized to None; will be derived automatically")
+
+    @validator("risk_analysis_action", always=True)
+    @classmethod
+    def _init_risk_analysis_action(cls, v, values) -> Optional[RiskAnalysisAction]:
+        if "saved_search" not in values:
+            raise ValueError("saved_search missing; risk_analysis_action is dependent on saved_search")
+
+        if v is not None:
+            raise ValueError(
+                "risk_analysis_action must be derived from the saved_search; leave as None and it will be derived "
+                "automatically"
+            )
+        return CorrelationSearch._get_risk_analysis_action(values['saved_search'].content)
+
+    @validator("notable_action", always=True)
+    @classmethod
+    def _init_notable_action(cls, v, values) -> Optional[NotableAction]:
+        if "saved_search" not in values:
+            raise ValueError("saved_search missing; notable_action is dependent on saved_search")
+        
+        if v is not None:
+            raise ValueError(
+                "notable_action must be derived from the saved_search; leave as None and it will be derived "
+                "automatically"
+            )
+        return CorrelationSearch._get_notable_action(values['saved_search'].content)
+
+    # def __init__(self, **kwargs) -> None:
+    #     # call the parent constructor
+    #     super().__init__(**kwargs)
+
+    #     # parse out the metadata we care about
+    #     # TODO: ideally, we could handle this w/ a call to model_post_init, but that is a pydantic v2 feature
+    #     #   https://docs.pydantic.dev/latest/api/base_model/#pydantic.main.BaseModel.model_post_init
+    #     self._parse_metadata()
+
+    @property
+    def earliest_time(self) -> str:
+        if self.saved_search is not None:
+            return self.saved_search.content[SavedSearchKeys.EARLIEST_TIME_KEY.value]
+        else:
+            raise ClientError("Something unexpected went wrong in initialization; saved_search was not populated")
+
+    @property
+    def latest_time(self) -> str:
+        if self.saved_search is not None:
+            return self.saved_search.content[SavedSearchKeys.LATEST_TIME_KEY.value]
+        else:
+            raise ClientError("Something unexpected went wrong in initialization; saved_search was not populated")
+
+    @property
+    def cron_schedule(self) -> str:
+        if self.saved_search is not None:
+            return self.saved_search.content[SavedSearchKeys.CRON_SCHEDULE_KEY.value]
+        else:
+            raise ClientError("Something unexpected went wrong in initialization; saved_search was not populated")
+
+    @property
+    def enabled(self) -> bool:
+        if self.saved_search is not None:
+            if int(self.saved_search.content[SavedSearchKeys.DISBALED_KEY.value]):
+                return False
+            else:
+                return True
+        else:
+            raise ClientError("Something unexpected went wrong in initialization; saved_search was not populated")
+
+    # @property
+    # def risk_analysis_action(self) -> Optional[RiskAnalysisAction]:
+    #     if self._risk_analysis_action is None:
+    #         self._parse_risk_analysis_action()
+    #     return self._risk_analysis_action
+
+    # @property
+    # def notable_action(self) -> Optional[NotableAction]:
+    #     if self._notable_action is None:
+    #         self._parse_notable_action()
+    #     return self._notable_action
+
+    @staticmethod
+    def _get_risk_analysis_action(content: dict[str, Any]) -> Optional[RiskAnalysisAction]:
+        if int(content[SavedSearchKeys.RISK_ACTION_KEY.value]):
+            try:
+                return RiskAnalysisAction.parse_from_dict(content)
+            except ValueError as e:
+                raise ClientError(f"Error unpacking RiskAnalysisAction: {e}")
+        return None
+
+    @staticmethod
+    def _get_notable_action(content: dict[str, Any]) -> Optional[NotableAction]:
+        # grab notable details if present
+        if int(content[SavedSearchKeys.NOTABLE_ACTION_KEY.value]):
+            return NotableAction.parse_from_dict(content)
+        return None
+
+    def _parse_metadata(self) -> None:
+        """Parses the metadata we care about from self.saved_search.content
+
+        :raises KeyError: if self.saved_search.content does not contain a required key
+        :raises json.JSONDecodeError: if the value at self.saved_search.content['action.risk.param._risk'] can't be
+            decoded from JSON into a dict
+        :raises IntegrationTestingError: if the value at self.saved_search.content['action.risk.param._risk'] is
+            unpacked to be anything other than a singleton
+        """
+        # grab risk details if present
+        self.risk_analysis_action = CorrelationSearch._get_risk_analysis_action(
+            self.saved_search.content  # type: ignore
+        )
+
+        # grab notable details if present
+        self.notable_action = CorrelationSearch._get_notable_action(self.saved_search.content)  # type: ignore
+
+    def refresh(self) -> None:
+        """Refreshes the metadata in the SavedSearch entity, and re-parses the fields we care about
+
+        After operations we expect to alter the state of the SavedSearch, we call refresh so that we have a local
+        representation of the new state; then we extrat what we care about into this instance
+        """
+        self.logger.debug(
+            f"Refreshing SavedSearch metadata for {self.name}...")
+        try:
+            self.saved_search.refresh()  # type: ignore
+        except HTTPError as e:
+            raise ServerError(f"HTTP error encountered during refresh: {e}")
+        self._parse_metadata()
+
+    def enable(self, refresh: bool = True) -> None:
+        """Enables the SavedSearch
+
+        Enable the SavedSearch entity, optionally calling self.refresh() (optional, because in some situations the
+        caller may want to handle calling refresh, to avoid repeated network operations).
+        :param refresh: a bool indicating whether to run refresh after enabling
+        """
+        self.logger.debug(f"Enabling {self.name}...")
+        try:
+            self.saved_search.enable()  # type: ignore
+        except HTTPError as e:
+            raise ServerError(f"HTTP error encountered while enabling detection: {e}")
+        if refresh:
+            self.refresh()
+
+    def disable(self, refresh: bool = True) -> None:
+        """Disables the SavedSearch
+
+        Disable the SavedSearch entity, optionally calling self.refresh() (optional, because in some situations the
+        caller may want to handle calling refresh, to avoid repeated network operations).
+        :param refresh: a bool indicating whether to run refresh after disabling
+        """
+        self.logger.debug(f"Disabling {self.name}...")
+        try:
+            self.saved_search.disable()  # type: ignore
+        except HTTPError as e:
+            raise ServerError(f"HTTP error encountered while disabling detection: {e}")
+        if refresh:
+            self.refresh()
+
+    @ property
+    def has_risk_analysis_action(self) -> bool:
+        """Whether the correlation search has an associated risk analysis Adaptive Response Action
+        :return: a boolean indicating whether it has a risk analysis Adaptive Response Action
+        """
+        return self.risk_analysis_action is not None
+
+    @property
+    def has_notable_action(self) -> bool:
+        """Whether the correlation search has an associated notable Adaptive Response Action
+        :return: a boolean indicating whether it has a notable Adaptive Response Action
+        """
+        return self.notable_action is not None
+
+    # TODO: evaluate sane defaults here (e.g. 3y is good now, but maybe not always...); NOTE also that
+    # contentctl may already be munging timestamps for us
+    def update_timeframe(
+        self,
+        earliest_time: str = ScheduleConfig.EARLIEST_TIME.value,
+        latest_time: str = ScheduleConfig.LATEST_TIME.value,
+        cron_schedule: str = ScheduleConfig.CRON_SCHEDULE.value,
+        refresh: bool = True
+    ) -> None:
+        """Updates the correlation search timeframe to work with test data
+
+        Updates the correlation search timeframe such that it runs according to the given cron schedule, and that the
+        data it runs on is no older than the given earliest time and no newer than the given latest time; optionally
+        calls self.refresh() (optional, because in some situations the caller may want to handle calling refresh, to
+        avoid repeated network operations).
+        :param earliest_time: the max age of data for the search to run on (default: "-3y@y")
+        :param earliest_time: the max age of data for the search to run on (default: "-3y@y")
+        :param cron_schedule: the cron schedule for the search to run on (default: "*/1 * * * *")
+        :param refresh: a bool indicating whether to run refresh after enabling
+        """
+        print(f"***** HANDLERS 2 *** {self.logger.handlers}")
+        # update the SavedSearch accordingly
+        data = {
+            SavedSearchKeys.EARLIEST_TIME_KEY.value: earliest_time,
+            SavedSearchKeys.LATEST_TIME_KEY.value: latest_time,
+            SavedSearchKeys.CRON_SCHEDULE_KEY.value: cron_schedule
+        }
+        self.logger.info(data)
+        self.logger.info(f"Updating timeframe for '{self.name}': {data}")
+        try:
+            self.saved_search.update(**data)  # type: ignore
+        except HTTPError as e:
+            raise ServerError(f"HTTP error encountered while updating timeframe: {e}")
+
+        if refresh:
+            self.refresh()
+
+    def force_run(self, refresh=True) -> None:
+        """Forces a detection run
+
+        Enables the detection, adjusts the cron schedule to run every 1 minute, and widens the earliest/latest window
+        to run on test data.
+        :param refresh: a bool indicating whether to refresh the metadata for the detection (default True)
+        """
+        self.update_timeframe(refresh=False)
+        if not self.enabled:
+            self.enable(refresh=False)
+        else:
+            self.logger.warn(f"Detection '{self.name}' was already enabled")
+
+        if refresh:
+            self.refresh()
+
+    def risk_event_exists(self) -> bool:
+        """Whether a risk event exists
+
+        Queries the `risk` index and returns True if a risk event exists
+        :return: a bool indicating whether a risk event exists in the risk index
+        """
+        # TODO: make this a more specific query based on the search in question (and update the docstring to relfect
+        # when you do)
+        # construct our query and issue our search job on the risk index
+        query = "search index=risk | head 1"
+        result_iterator = self._search(query)
+        try:
+            for result in result_iterator:
+                # we return True if we find at least one risk object
+                # TODO: re-evaluate this condition; we may want to look for multiple risk objects on different
+                #   entitities
+                # (e.g. users vs systems) and we may want to do more confirmational testing
+                if result["index"] == Indexes.RISK_INDEX.value:
+                    self.logger.debug(
+                        f"Found risk event for '{self.name}': {result}")
+                    return True
+        except ServerError as e:
+            self.logger.error(f"Error returned from Splunk instance: {e}")
+            raise e
+        self.logger.debug(f"No risk event found for '{self.name}'")
+        return False
+
+    def notable_event_exists(self) -> bool:
+        """Whether a notable event exists
+
+        Queries the `notable` index and returns True if a risk event exists
+        :return: a bool indicating whether a risk event exists in the risk index
+        """
+        # TODO: make this a more specific query based on the search in question (and update the docstring to reflect
+        #   when you do)
+        # construct our query and issue our search job on the risk index
+        query = "search index=notable | head 1"
+        result_iterator = self._search(query)
+        try:
+            for result in result_iterator:
+                # we return True if we find at least one notable object
+                if result["index"] == Indexes.NOTABLE_INDEX.value:
+                    self.logger.debug(
+                        f"Found notable event for '{self.name}': {result}")
+                    return True
+        except ServerError as e:
+            self.logger.error(f"Error returned from Splunk instance: {e}")
+            raise e
+        self.logger.debug(f"No notable event found for '{self.name}'")
+        return False
+
+    def risk_message_is_valid(self) -> bool:
+        """Validates the observed risk message against the expected risk message"""
+        # TODO
+        raise NotImplementedError
+
+    # NOTE: it would be more ideal to switch this to a system which gets the handle of the saved search job and polls
+    #   it for completion, but that seems more tricky
+    def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP.value, raise_on_exc: bool = False) -> IntegrationTestResult:
+        """Execute the integration test
+
+        Executes an integration test for this CorrelationSearch. First, ensures no matching risk/notables already exist
+        and clear the indexes if so. Then, we force a run of the detection, wait for `sleep` seconds, and finally we
+        validate that the appropriate risk/notable events seem to have been created. NOTE: assumes the data already
+        exists in the instance
+        :param max_sleep: max number of seconds to sleep for after enabling the detection before we check for created
+            events; re-checks are made upon failures using an exponential backoff until the max is reached
+        :param raise_on_exc: bool flag indicating if an exception should be raised when caught by the test routine, or
+            if the error state should just be recorded for the test
+        """
+        # max_sleep must be greater than the base value we must wait for the scheduled searchjob to run (jobs run every
+        # 60s)
+        if max_sleep < TimeoutConfig.BASE_SLEEP.value:
+            raise ClientError(
+                f"max_sleep value of {max_sleep} is less than the base sleep required "
+                f"({TimeoutConfig.BASE_SLEEP.value})"
+            )
+
+        # initialize result as None
+        result: Optional[IntegrationTestResult] = None
+
+        # keep track of time slept and number of attempts for exponential backoff (base 2)
+        elapsed_sleep_time = 0
+        num_tries = 0
+
+        # set the initial base sleep time
+        time_to_sleep = TimeoutConfig.BASE_SLEEP.value
+
+        try:
+            # first make sure the indexes are currently empty and the detection is starting from a disabled state
+            self.logger.debug(
+                "Cleaning up any pre-existing risk/notable events..."
+            )
+            self.update_pbar(TestingStates.PRE_CLEANUP)
+            if self.risk_event_exists():
+                self.logger.warn(
+                    f"Risk events matching '{self.name}' already exist; marking for deletion")
+                self.indexes_to_purge.add(Indexes.RISK_INDEX.value)
+            if self.notable_event_exists():
+                self.logger.warn(
+                    f"Notable events matching '{self.name}' already exist; marking for deletion")
+                self.indexes_to_purge.add(Indexes.NOTABLE_INDEX.value)
+            self.cleanup()
+
+            # skip test if no risk or notable action defined
+            if not self.has_risk_analysis_action and not self.has_notable_action:
+                message = (
+                    f"No risk analysis or notable Adaptive Response actions defined for '{self.name}'; skipping "
+                    "integration test"
+                )
+                result = IntegrationTestResult(message=message, status=TestResultStatus.SKIP, wait_duration=0)
+            else:
+                # force the detection to run
+                self.logger.info(f"Forcing a run on {self.name}")
+                self.update_pbar(TestingStates.FORCE_RUN)
+                self.force_run()
+                time.sleep(TimeoutConfig.BASE_SLEEP.value)
+
+                # loop so long as the elapsed time is less than max_sleep
+                while elapsed_sleep_time < max_sleep:
+                    # sleep so the detection job can finish
+                    self.logger.info(f"Waiting {time_to_sleep} for {self.name} so it can finish")
+                    self.update_pbar(TestingStates.VALIDATING)
+                    time.sleep(time_to_sleep)
+                    elapsed_sleep_time += time_to_sleep
+
+                    self.logger.info(
+                        f"Validating detection (attempt #{num_tries + 1} - {elapsed_sleep_time} seconds elapsed of "
+                        f"{max_sleep} max)"
+                    )
+
+                    # reset the result to None on each loop iteration
+                    result = None
+
+                    # check for risk events
+                    self.logger.debug("Checking for matching risk events")
+                    if self.has_risk_analysis_action:
+                        if not self.risk_event_exists():
+                            result = IntegrationTestResult(
+                                status=TestResultStatus.FAIL,
+                                message=f"No matching risk event created for '{self.name}'",
+                                wait_duration=elapsed_sleep_time,
+                            )
+                        else:
+                            self.indexes_to_purge.add(Indexes.RISK_INDEX.value)
+
+                    # check for notable events
+                    self.logger.debug("Checking for matching notable events")
+                    if self.has_notable_action:
+                        if not self.notable_event_exists():
+                            # NOTE: because we check this last, if both fail, the error message about notables will
+                            # always be the last to be added and thus the one surfaced to the user; good case for
+                            # adding more descriptive test results
+                            result = IntegrationTestResult(
+                                status=TestResultStatus.FAIL,
+                                message=f"No matching notable event created for '{self.name}'",
+                                wait_duration=elapsed_sleep_time,
+                            )
+                        else:
+                            self.indexes_to_purge.add(Indexes.NOTABLE_INDEX.value)
+
+                    # if result is still None, then all checks passed and we can break the loop
+                    if result is None:
+                        result = IntegrationTestResult(
+                            status=TestResultStatus.PASS,
+                            message=f"Expected risk and/or notable events were created for '{self.name}'",
+                            wait_duration=elapsed_sleep_time,
+                        )
+                        break
+
+                    # increment number of attempts to validate detection
+                    num_tries += 1
+
+                    # compute the next time to sleep for
+                    time_to_sleep = 2**num_tries
+
+                    # if the computed time to sleep will exceed max_sleep, adjust appropriately
+                    if (elapsed_sleep_time + time_to_sleep) > max_sleep:
+                        time_to_sleep = max_sleep - elapsed_sleep_time
+
+            # cleanup the created events, disable the detection and return the result
+            self.logger.debug("Cleaning up any created risk/notable events...")
+            self.update_pbar(TestingStates.POST_CLEANUP)
+            self.cleanup()
+        except IntegrationTestingError as e:
+            if not raise_on_exc:
+                result = IntegrationTestResult(
+                    status=TestResultStatus.ERROR,
+                    message=f"Exception raised during integration test: {e}",
+                    wait_duration=elapsed_sleep_time,
+                    exception=e,
+                )
+                self.logger.exception(f"{result.status.name}: {result.message}")  # type: ignore
+            else:
+                raise e
+
+        # log based on result status
+        if result is not None:
+            if result.status == TestResultStatus.PASS or result.status == TestResultStatus.SKIP:
+                self.logger.info(f"{result.status.name}: {result.message}")
+            elif result.status == TestResultStatus.FAIL:
+                self.logger.error(f"{result.status.name}: {result.message}")
+            elif result.status != TestResultStatus.ERROR:
+                message = f"Unexpected result status code: {result.status}"
+                self.logger.error(message)
+                raise ClientError(message)
+        else:
+            message = "Result was not generated; something went wrong..."
+            self.logger.error(message)
+            raise ClientError(message)
+
+        return result
+
+    def _search(self, query: str) -> ResultIterator:
+        """Execute a search job against the Splunk instance
+
+        Given a query, creates a search job on the Splunk instance. Jobs are created in blocking mode and won't return
+        until results ready.
+        :param query: the SPL string to run
+        """
+        self.logger.debug(f"Executing query: `{query}`")
+        job = self.service.search(query, exec_mode="blocking")
+
+        # query the results, catching any HTTP status code errors
+        try:
+            response_reader: ResponseReader = job.results(output_mode="json")
+        except HTTPError as e:
+            # e.g. ->  HTTP 400 Bad Request -- b'{"messages":[{"type":"FATAL","text":"Error in \'delete\' command: You
+            #   have insufficient privileges to delete events."}]}'
+            message = f"Error querying Splunk instance: {e}"
+            self.logger.error(message)
+            raise ServerError(message)
+
+        return ResultIterator(response_reader)
+
+    def _delete_index(self, index: str) -> None:
+        """Deletes events in a given index
+
+        Given an index, purge all events from it
+        :param index: index to delete all events from (e.g. 'risk')
+        """
+        # construct our query and issue our delete job on the index
+        self.logger.debug(f"Deleting index '{index}")
+        query = f"search index={index} | delete"
+        result_iterator = self._search(query)
+
+        # we should get two results, one for "__ALL__" and one for the index; iterate until we find the one for the
+        # given index
+        found_index = False
+        for result in result_iterator:
+            if result["index"] == index:
+                found_index = True
+                self.logger.info(
+                    f"Deleted {result['deleted']} from index {result['index']} with {result['errors']} errors"
+                )
+
+                # check for errors
+                if result["errors"] != "0":
+                    message = f"Errors encountered during delete operation on index {self.name}"
+                    raise ServerError(message)
+
+        # raise an error if we never encountered a result showing a delete operation in the given index
+        if not found_index:
+            message = f"No result returned showing deletion in index {index}"
+            raise ServerError(message)
+
+    # TODO: refactor to allow for cleanup externally of the testing index; also ensure that ES is configured to use the
+    #   default index set by contenctl
+    def cleanup(self, delete_test_index=False) -> None:
+        """Cleans up after an integration test
+
+        First, disable the detection; then dump the risk, notable, and (optionally) test indexes. The test index is
+        optional because the contentctl capability we will be piggybacking on has it's own cleanup routine.
+        NOTE: This does not restore the detection/search to it's original state; changes made to earliest/latest time
+        and the cron schedule persist after cleanup
+        :param delete_test_index: flag indicating whether the test index should be cleared or not (defaults to False)
+        """
+        # delete_test_index can't be true when test_index is None
+        if delete_test_index and (self.test_index is None):
+            raise ClientError("test_index is None, cannot delete it")
+
+        # disable the detection
+        self.disable()
+
+        # delete the indexes
+        if delete_test_index:
+            self.indexes_to_purge.add(self.test_index)  # type: ignore
+        for index in self.indexes_to_purge:
+            self._delete_index(index)
+        self.indexes_to_purge.clear()
+
+    def update_pbar(
+        self,
+        state: str,
+    ) -> str:
+        """
+        Instance specific function to log integrtation testing information via pbar
+        :param state: the state/message of the test to be logged
+        :returns: a formatted string for use w/ pbar
+        """
+        # invoke the helper method on our instance attrs and return
+        return format_pbar_string(
+            self.pbar_data.pbar,
+            TestReportingType.INTEGRATION,
+            self.pbar_data.fq_test_name,
+            state,
+            self.pbar_data.start_time,
+            True
+        )
diff --git a/contentctl/objects/integration_test_result.py b/contentctl/objects/integration_test_result.py
index f66a4740..8d2d8284 100644
--- a/contentctl/objects/integration_test_result.py
+++ b/contentctl/objects/integration_test_result.py
@@ -1,5 +1,8 @@
+from typing import Optional
+
 from contentctl.objects.base_test_result import BaseTestResult
 
 
 class IntegrationTestResult(BaseTestResult):
-    pass
+    # the total time we slept waiting for the detection to fire after activating it
+    wait_duration: Optional[int] = None
diff --git a/contentctl/objects/notable_action.py b/contentctl/objects/notable_action.py
new file mode 100644
index 00000000..7492a3ec
--- /dev/null
+++ b/contentctl/objects/notable_action.py
@@ -0,0 +1,36 @@
+from typing import Any
+
+from pydantic import BaseModel
+
+
+class NotableAction(BaseModel):
+    """Representation of a notable action
+
+    NOTE: this is NOT a representation of notable generated as a result of detection firing, but rather of the Adaptive
+    Response Action that generates the notable
+    :param rule_name: the name of the rule (detection/search) associated with the notable action (e.g. "ESCU -
+        Windows Modify Registry EnableLinkedConnections - Rule")
+    :param rule_description: a description of the rule (detection/search) associated with the notable action
+    :param security_domain: the domain associated with the notable action and related rule (detection/search)
+    :param severity: severity (e.g. "high") associated with the notable action and related rule (detection/search)
+    """
+    rule_name: str
+    rule_description: str
+    security_domain: str
+    severity: str
+
+    @classmethod
+    def parse_from_dict(cls, dict_: dict[str, Any]) -> "NotableAction":
+        """
+        Given a dictionary of values, parses out specific keys to construct the instance
+
+        :param dict_: a dictionary of with the expected keys
+        :return: and instance of NotableAction
+        :raises KeyError: if the dictionary given does not contain a required key
+        """
+        return cls(
+            rule_name=dict_["action.notable.param.rule_title"],
+            rule_description=dict_["action.notable.param.rule_description"],
+            security_domain=dict_["action.notable.param.security_domain"],
+            severity=dict_["action.notable.param.severity"]
+        )
diff --git a/contentctl/objects/risk_analysis_action.py b/contentctl/objects/risk_analysis_action.py
new file mode 100644
index 00000000..cdbb1674
--- /dev/null
+++ b/contentctl/objects/risk_analysis_action.py
@@ -0,0 +1,70 @@
+from typing import Any
+import json
+
+from pydantic import BaseModel
+
+from contentctl.objects.risk_object import RiskObject
+from contentctl.objects.threat_object import ThreatObject
+
+
+class RiskAnalysisAction(BaseModel):
+    """Representation of a risk analysis action
+
+    NOTE: this is NOT a representation of risk event generated as a result of detection firing, but rather of the
+    Adaptive Response Action that generates the risk event
+    :param risk_objects: a list of RiskObject instances for this action
+    :param message: the message associated w/ the risk event (NOTE: may contain macros of the form $...$ which
+        should be replaced with real values in the resulting risk events)
+    """
+    risk_objects: list[RiskObject]
+    message: str
+
+    @classmethod
+    def parse_from_dict(cls, dict_: dict[str, Any]) -> "RiskAnalysisAction":
+        """
+        Given a dictionary of values, parses out specific keys to construct one or more instances.
+
+        The risk action has 1 or more associates risk objects (e.g. user, system, etc.). These are stored as a list of
+        dicts inside the 'action.risk.param._risk' field, which we need to unpack the JSON of.
+        :param dict_: a dictionary of with the expected keys
+        :return: and instance of RiskAnalysisAction
+        :raises KeyError: if the dictionary given does not contain a required key
+        :raises json.JSONDecodeError: if the value at dict_['action.risk.param._risk'] can't be decoded from JSON into
+            a dict
+        :raises ValueError: if the value at dict_['action.risk.param._risk'] is unpacked to be anything
+            other than a singleton or if an unexpected field is enoucountered within the singleton
+        :return: a RiskAnalysisAction
+        """
+        object_dicts = json.loads(dict_["action.risk.param._risk"])
+
+        if len(object_dicts) < 1:
+            raise ValueError(
+                f"Risk analysis action has no objects (threat nor risk) defined in 'action.risk.param._risk': "
+                f"{dict_['action.risk.param._risk']}"
+            )
+
+        risk_objects: list[RiskObject] = []
+        threat_objects: list[ThreatObject] = []
+
+        # TODO: should we raise an error if we have only threat objects and no risk objects? Scoring is only a notion
+        #   for risk objects as far as I'm aware
+        for entry in object_dicts:
+            if "risk_object_field" in entry:
+                risk_objects.append(RiskObject(
+                    field=entry["risk_object_field"],
+                    type=entry["risk_object_type"],
+                    score=int(entry["risk_score"])
+                ))
+            elif "threat_object_field" in entry:
+                threat_objects.append(ThreatObject(
+                    field=entry["threat_object_field"],
+                    type=entry["threat_object_type"]
+                ))
+            else:
+                raise ValueError(
+                    f"Unexpected object within 'action.risk.param._risk': {entry}"
+                )
+        return cls(
+            risk_objects=risk_objects,
+            message=dict_["action.risk.param._risk_message"]
+        )
diff --git a/contentctl/objects/risk_object.py b/contentctl/objects/risk_object.py
new file mode 100644
index 00000000..61955c93
--- /dev/null
+++ b/contentctl/objects/risk_object.py
@@ -0,0 +1,18 @@
+from pydantic import BaseModel
+
+
+class RiskObject(BaseModel):
+    """Representation of a risk object associated with a risk analysis action
+
+    Detections define observables (e.g. a user) with a role of "victim" with which the risk analysis and scoring gets
+    associated. On the Enterprise Security side, these translate to risk objects. Each risk object has a field from
+    which it draws its name and a type (e.g. user, system, etc.). A risk event will be generated for each risk
+    object accordingly. NOTE: obervables w/out an explicit role MAY default to being a risk object; it is not clear
+    at the time of writing
+    :param field: the name of the field from which the risk object will get it's name
+    :param type_: the type of the risk object (e.g. "system")
+    :param score: the risk score associated with the obersevable (e.g. 64)
+    """
+    field: str
+    type: str
+    score: int
diff --git a/contentctl/objects/test_config.py b/contentctl/objects/test_config.py
index 59896a30..d2a2d932 100644
--- a/contentctl/objects/test_config.py
+++ b/contentctl/objects/test_config.py
@@ -491,6 +491,7 @@ class TestConfig(BaseModel, extra=Extra.forbid, validate_assignment=True):
         title="Whether integration testing should be enabled, in addition to unit testing (requires a configured Splunk"
         " instance with ES installed)"
     )
+    # TODO: add setting to skip listing skips
     
 
 
diff --git a/contentctl/objects/threat_object.py b/contentctl/objects/threat_object.py
new file mode 100644
index 00000000..020c4b7a
--- /dev/null
+++ b/contentctl/objects/threat_object.py
@@ -0,0 +1,15 @@
+from pydantic import BaseModel
+
+
+class ThreatObject(BaseModel):
+    """Representation of a threat object associated with a risk analysis action
+
+    Detections define observables with a role of "attacker" with which the risk analysis gets associated. On the
+    Enterprise Security side, these translate to threat objects. Each threat object has a field from which it draws its
+    name and a type. NOTE: it is unclear whether a threat object will have a corresponding risk event, or perhaps a
+    corresponding threat event?
+    :param field: the name of the field from which the risk object will get it's name
+    :param type_: the type of the risk object (e.g. "system")
+    """
+    field: str
+    type: str
diff --git a/contentctl/objects/unit_test_result.py b/contentctl/objects/unit_test_result.py
index 814eeb85..2ec110de 100644
--- a/contentctl/objects/unit_test_result.py
+++ b/contentctl/objects/unit_test_result.py
@@ -13,7 +13,6 @@
 
 class UnitTestResult(BaseTestResult):
     missing_observables: list[str] = []
-    sid_link: Union[None, str] = None
 
     def set_job_content(
         self,

From 5decf76580a75662d92515c2307f59139a92cd91 Mon Sep 17 00:00:00 2001
From: Casey McGinley <cmcginley@splunk.com>
Date: Thu, 30 Nov 2023 09:49:02 -0800
Subject: [PATCH 10/22] docsting, comment, print cleanup

---
 .../DetectionTestingInfrastructure.py         |  79 ++++++--
 contentctl/input/detection_builder.py         |   2 +-
 contentctl/objects/correlation_search.py      | 172 ++++++++++--------
 3 files changed, 154 insertions(+), 99 deletions(-)

diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
index c3efe29d..6e8bfd8c 100644
--- a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
+++ b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
@@ -12,7 +12,7 @@
 from sys import stdout
 from dataclasses import dataclass
 from shutil import copyfile
-from typing import Union
+from typing import Union, Optional
 
 from pydantic import BaseModel, PrivateAttr, Field
 import requests
@@ -64,8 +64,12 @@
 #   completed/elapsed counter)
 # TODO: investigate failures (true, transient, latency?)
 # TODO: why is the unit test duration so long on Risk Rule for Dev Sec Ops by Repository? big dataset? long
-#   cleanup/upload? retries in clearing the "stash" index? Maybe the stash index gets super big... we shouldn't be 
+#   cleanup/upload? retries in clearing the "stash" index? Maybe the stash index gets super big... we shouldn't be
 #   testing on Correlation type detections anyway
+# TODO: make it clear to Eric that test durations will no longer account for
+#   replay/cleanup in this new test grouping model (in the CLI reporting; the recorded
+#   test duration in the result file will include the setup/cleanup time in the duration
+#   for both unit and integration tests (effectively double counted))
 
 
 class SetupTestGroupResults(BaseModel):
@@ -430,7 +434,15 @@ def execute(self):
                 self.sync_obj.outputQueue.append(detection)
                 self.sync_obj.currentTestingQueue[self.get_name()] = None
 
-    def test_detection(self, detection: Detection):
+    def test_detection(self, detection: Detection) -> None:
+        """
+        Tests a single detection; iterates over the TestGroups for the detection (one TestGroup per
+        unit test, where a TestGroup is a unit test and integration test relying on the same attack
+        data)
+        :param detection: the Detection to test
+        """
+        # TODO: do we want to return a failure here if no test exists for a production detection?
+        # Log and return if no tests exist
         if detection.tests is None:
             self.pbar.write(f"No test(s) found for {detection.name}")
             return
@@ -453,20 +465,15 @@ def test_detection(self, detection: Detection):
             # replay attack_data
             setup_results = self.setup_test_group(test_group)
 
-            # TODO: make it clear to Eric that test durations will no longer account for
-            # replay/cleanup in this new test grouping model (in the CLI reporting; the recorded
-            # test duration in the result file will include the setup/cleanup time in the duration
-            # for both unit and integration tests (effectively double counted))
-
             # run unit test
             self.execute_unit_test(detection, test_group.unit_test, setup_results)
 
-            # TODO: if unit test fails, SKIP or FAIL the integration test
             # run integration test
             self.execute_integration_test(
                 detection,
                 test_group.integration_test,
-                setup_results
+                setup_results,
+                test_group.unit_test.result
             )
 
             # cleanup
@@ -510,6 +517,8 @@ def setup_test_group(self, test_group: TestGroup) -> SetupTestGroupResults:
         # Capture the setup start time
         setup_start_time = time.time()
         self.pbar.write(f"setup_start_time: {setup_start_time}")
+
+        # Log the start of the test group
         self.pbar.reset()
         self.format_pbar_string(
             TestReportingType.GROUP,
@@ -521,23 +530,35 @@ def setup_test_group(self, test_group: TestGroup) -> SetupTestGroupResults:
         # Use NullBar if there is more than 1 container or we are running
         # in a non-interactive context
 
+        # Initialize the setup results
         results = SetupTestGroupResults(start_time=setup_start_time)
 
+        # Replay attack data
         try:
             self.replay_attack_data_files(test_group, setup_start_time)
         except Exception as e:
             results.exception = e
             results.success = False
 
+        # Set setup duration
         results.duration = time.time() - setup_start_time
+
         return results
 
     def cleanup_test_group(
-            self,
-            test_group: TestGroup,
-            test_group_start_time: float
+        self,
+        test_group: TestGroup,
+        test_group_start_time: float,
     ) -> CleanupTestGroupResults:
+        """
+        Deletes attack data for the test group and returns metadata about the cleanup duration
+        :param test_group: the TestGroup being cleaned up
+        :param test_group_start_time: the start time of the TestGroup (for logging)
+        """
+        # Get the start time for cleanup
         cleanup_start_time = time.time()
+
+        # Log the cleanup action
         self.format_pbar_string(
             TestReportingType.GROUP,
             test_group.name,
@@ -546,8 +567,10 @@ def cleanup_test_group(
         )
 
         # TODO: do we want to clean up even if replay failed? Could have been partial failure?
+        # Delete attack data
         self.delete_attack_data(test_group.attack_data)
 
+        # Return the cleanup metadata, adding start time and duration
         return CleanupTestGroupResults(
             duration=time.time() - cleanup_start_time,
             start_time=cleanup_start_time
@@ -699,16 +722,16 @@ def execute_unit_test(
                 and (test.result is None or test.result.success is False)
             )
         ) and not self.sync_obj.terminate:
+            # Assume failure
+            res = "FAIL"
+
             # Determine the state to report to the user
             if test.result is None:
                 res = "ERROR"
                 link = detection.search
             else:
-                res = test.result.success
-                if res:
+                if test.result.success:
                     res = "PASS"
-                else:
-                    res = "FAIL"
                 link = test.result.get_summary_dict()["sid_link"]
 
             self.format_pbar_string(
@@ -724,6 +747,7 @@ def execute_unit_test(
             except Exception:
                 pass
 
+        # Report success
         if test.result is not None and test.result.success:
             self.pbar.write(
                 self.format_pbar_string(
@@ -734,8 +758,8 @@ def execute_unit_test(
                     set_pbar=False,
                 )
             )
-
         else:
+            # Report failure
             self.pbar.write(
                 self.format_pbar_string(
                     TestReportingType.UNIT,
@@ -745,7 +769,11 @@ def execute_unit_test(
                     set_pbar=False,
                 )
             )
+
+        # Flush stdout for pbar
         stdout.flush()
+
+        # Add duration
         if test.result is not None:
             test.result.duration = round(time.time() - test_start_time, 2)
 
@@ -754,6 +782,7 @@ def execute_integration_test(
         detection: Detection,
         test: IntegrationTest,
         setup_results: SetupTestGroupResults,
+        unit_test_result: Optional[UnitTestResult]
     ):
         """
         Executes an integration test on the detection
@@ -762,7 +791,12 @@ def execute_integration_test(
         # Capture unit test start time
         test_start_time = time.time()
 
-        # First, check to see if this test has been skipped; log and return if so
+        # First, check to see if the unit test failed; skip integration testing if so
+        if (unit_test_result is not None) and (unit_test_result.status == TestResultStatus.FAIL):
+            test.skip("TEST SKIPPED: associated unit test failed")
+
+        # Next, check to see if this test has been skipped (just now or elsewhere); log and return
+        # if so
         if test.result is not None and test.result.status == TestResultStatus.SKIP:
             # report the skip to the CLI
             self.pbar.write(
@@ -814,17 +848,22 @@ def execute_integration_test(
 
         # Run the test
         try:
+            # Capture pbar data for CorrelationSearch logging
             pbar_data = PbarData(
                 pbar=self.pbar,
                 fq_test_name=f"{detection.name}:{test.name}",
                 start_time=test_start_time
             )
+
+            # Instantiate the CorrelationSearch
             correlation_search = CorrelationSearch(
                 detection_name=detection.name,
                 service=self.get_conn(),
                 pbar_data=pbar_data,
             )
+
             # TODO: add max_sleep as a configurable value on cli/config
+            # Run the test
             test.result = correlation_search.test()
         except Exception as e:
             # Catch and report and unhandled exceptions in integration testing
@@ -848,7 +887,7 @@ def execute_integration_test(
             if test.result is None:
                 res = "ERROR"
             else:
-                res = test.result.status.value
+                res = test.result.status.value.upper()
 
             self.format_pbar_string(
                 TestReportingType.INTEGRATION,
diff --git a/contentctl/input/detection_builder.py b/contentctl/input/detection_builder.py
index 7ad14169..e54f64b2 100644
--- a/contentctl/input/detection_builder.py
+++ b/contentctl/input/detection_builder.py
@@ -289,7 +289,7 @@ def skipIntegrationTests(self) -> None:
         if self.security_content_obj is not None and isinstance(self.security_content_obj, Detection):
             for test in self.security_content_obj.tests:
                 if isinstance(test, IntegrationTest):
-                    test.skip("Skipping all integration tests")
+                    test.skip("TEST SKIPPED: Skipping all integration tests")
         else:
             raise ValueError(
                 "security_content_obj must be an instance of Detection to skip integration tests, "
diff --git a/contentctl/objects/correlation_search.py b/contentctl/objects/correlation_search.py
index 3741770b..895c9028 100644
--- a/contentctl/objects/correlation_search.py
+++ b/contentctl/objects/correlation_search.py
@@ -35,15 +35,14 @@ def get_logger() -> logging.Logger:
     """
     # get logger for module
     logger = logging.getLogger(__name__)
-    logger.propagate = False
-    print(f"***** Got logger *** {logger}: {__name__}")
 
+    # set propagate to False if not already set as such (needed to that we do not flow up to any
+    # root loggers)
+    if logger.propagate:
+        logger.propagate = False
 
     # if logger has no handlers, it needs to be configured for the first time
-    print(f"***** Has handlers *** {logger.hasHandlers()}")
-    print(f"***** Has handlers 2 *** {logger.handlers}")
     if not logger.hasHandlers():
-        print(f"***** No handlers *** {logger.handlers}")
         # set level
         logger.setLevel(LOG_LEVEL)
 
@@ -53,18 +52,15 @@ def get_logger() -> logging.Logger:
             handler = logging.FileHandler(LOG_PATH)
         else:
             handler = logging.NullHandler()
-        print(f"***** Made handlers *** {handler}")
 
         # Format our output
         formatter = logging.Formatter('%(asctime)s - %(levelname)s:%(name)s - %(message)s')
         handler.setFormatter(formatter)
-        print(f"***** Made formatter *** {formatter}")
 
         # Set handler level and add to logger
         handler.setLevel(LOG_LEVEL)
         logger.addHandler(handler)
-        print(f"***** Added handler *** {logger.handlers}")
-    print(f"***** HANDLERS *** {logger.handlers}")
+
     return logger
 
 
@@ -135,8 +131,10 @@ class ResultIterator:
     :param logger: a Logger object
     """
     def __init__(self, response_reader: ResponseReader) -> None:
+        # init the results reader
         self.results_reader: JSONResultsReader = JSONResultsReader(
-            response_reader)
+            response_reader
+        )
 
         # get logger
         self.logger: logging.Logger = get_logger()
@@ -183,12 +181,13 @@ class PbarData(BaseModel):
     start_time: float
 
     class Config:
+        # needed to support the tqdm type
         arbitrary_types_allowed = True
 
 
-# TODO: right now, we are creating one CorrelationSearch instance for each test case; in general, there is only one
+# TODO: right now, we are creating one CorrelationSearch instance for each test case; typically, there is only one
 #   unit test, and thus one integration test, per detection, so this is not an issue. However, if we start having many
-#   test cases per detection, we will be duplicating some effort & network calls that we don't need to. Condier
+#   test cases per detection, we will be duplicating some effort & network calls that we don't need to. Consider
 #   refactoring in order to re-use CorrelationSearch objects across tests in such a case
 class CorrelationSearch(BaseModel):
     """Representation of a correlation search in Splunk
@@ -197,36 +196,56 @@ class CorrelationSearch(BaseModel):
     detection rule for our purposes.
     :param detection_name: the name of the search/detection (e.g. "Windows Modify Registry EnableLinkedConnections")
     :param service: a Service instance representing a connection to a Splunk instance
-    :param test_index: the index attack data is forwarded to for testing (optionally used in cleanup)
     :param pbar_data: the encapsulated info needed for logging w/ pbar
+    :param test_index: the index attack data is forwarded to for testing (optionally used in cleanup)
     """
-    # our instance fields
+    ## The following three fields are explicitly needed at instantiation                            # noqa: E266
+
+    # the name of the search/detection (e.g. "Windows Modify Registry EnableLinkedConnections")
     detection_name: str
 
+    # a Service instance representing a connection to a Splunk instance
     service: splunklib.Service
+
+    # the encapsulated info needed for logging w/ pbar
     pbar_data: PbarData
 
-    # TODO: replace this w/ pbar stuff
+    ## The following field is optional for instantiation                                            # noqa: E266
+
+    # The index attack data is sent to; can be None if we are relying on the caller to do our
+    # cleanup of this index
     test_index: Optional[str] = Field(default=None, min_length=1)
 
+    ## All remaining fields can be derived from other fields or have intentional defaults that      # noqa: E266
+    ## should not be changed (validators should prevent instantiating some of these fields directly # noqa: E266
+    ## to prevent undefined behavior)                                                               # noqa: E266
+
+    # The logger to use (logs all go to a null pipe unless ENABLE_LOGGING is set to True, so as not
+    # to conflict w/ tqdm)
     logger: logging.Logger = Field(default_factory=get_logger, const=True)
 
+    # The search name (e.g. "ESCU - Windows Modify Registry EnableLinkedConnections - Rule")
     name: Optional[str] = None
+
+    # The path to the saved search on the Splunk instance
     splunk_path: Optional[str] = None
+
+    # A model of the saved search as provided by splunklib
     saved_search: Optional[splunklib.SavedSearch] = None
+
+    # The set of indexes to clear on cleanup
     indexes_to_purge: set[str] = set()
 
-    # earliest_time: Optional[str] = None
-    # latest_time: Optional[str] = None
-    # cron_schedule: Optional[str] = None
+    # The risk analysis adaptive response action (if defined)
     risk_analysis_action: Union[RiskAnalysisAction, None] = None
+
+    # The notable adaptive response action (if defined)
     notable_action: Union[NotableAction, None] = None
 
     class Config:
+        # needed to allow fields w/ types like SavedSearch
         arbitrary_types_allowed = True
 
-    # enabled: bool = False
-
     @validator("name", always=True)
     @classmethod
     def _convert_detection_to_search_name(cls, v, values) -> str:
@@ -235,7 +254,7 @@ def _convert_detection_to_search_name(cls, v, values) -> str:
         """
         if "detection_name" not in values:
             raise ValueError("detection_name missing; name is dependent on detection_name")
-        
+
         expected_name = f"ESCU - {values['detection_name']} - Rule"
         if v is not None and v != expected_name:
             raise ValueError(
@@ -251,7 +270,7 @@ def _derive_splunk_path(cls, v, values) -> str:
         """
         if "name" not in values:
             raise ValueError("name missing; splunk_path is dependent on name")
-        
+
         expected_path = f"saved/searches/{values['name']}"
         if v is not None and v != expected_path:
             raise ValueError(
@@ -267,7 +286,7 @@ def _instantiate_saved_search(cls, v, values) -> str:
         """
         if "splunk_path" not in values or "service" not in values:
             raise ValueError("splunk_path or service missing; saved_search is dependent on both")
-        
+
         if v is not None:
             raise ValueError(
                 "saved_search must be derived from the service and splunk_path; leave as None and it will be derived "
@@ -278,18 +297,16 @@ def _instantiate_saved_search(cls, v, values) -> str:
             values['splunk_path'],
         )
 
-    # @validator("risk_analysis_action", "notable_action")
-    # @classmethod
-    # def _initialized_to_none(cls, v, values) -> None:
-    #     """
-    #     Ensure a field was initialized as None
-    #     """
-    #     if v is not None:
-    #         raise ValueError("field must be initialized to None; will be derived automatically")
-
+    # TODO: ideally, we could handle this and the following init w/ a call to model_post_init, so
+    #   that all the logic is encapsulated w/in _parse_risk_and_notable_actions but that is a
+    #   pydantic v2 feature:
+    #   https://docs.pydantic.dev/latest/api/base_model/#pydantic.main.BaseModel.model_post_init
     @validator("risk_analysis_action", always=True)
     @classmethod
     def _init_risk_analysis_action(cls, v, values) -> Optional[RiskAnalysisAction]:
+        """
+        Initialize risk_analysis_action
+        """
         if "saved_search" not in values:
             raise ValueError("saved_search missing; risk_analysis_action is dependent on saved_search")
 
@@ -303,9 +320,12 @@ def _init_risk_analysis_action(cls, v, values) -> Optional[RiskAnalysisAction]:
     @validator("notable_action", always=True)
     @classmethod
     def _init_notable_action(cls, v, values) -> Optional[NotableAction]:
+        """
+        Initialize notable_action
+        """
         if "saved_search" not in values:
             raise ValueError("saved_search missing; notable_action is dependent on saved_search")
-        
+
         if v is not None:
             raise ValueError(
                 "notable_action must be derived from the saved_search; leave as None and it will be derived "
@@ -313,17 +333,11 @@ def _init_notable_action(cls, v, values) -> Optional[NotableAction]:
             )
         return CorrelationSearch._get_notable_action(values['saved_search'].content)
 
-    # def __init__(self, **kwargs) -> None:
-    #     # call the parent constructor
-    #     super().__init__(**kwargs)
-
-    #     # parse out the metadata we care about
-    #     # TODO: ideally, we could handle this w/ a call to model_post_init, but that is a pydantic v2 feature
-    #     #   https://docs.pydantic.dev/latest/api/base_model/#pydantic.main.BaseModel.model_post_init
-    #     self._parse_metadata()
-
     @property
     def earliest_time(self) -> str:
+        """
+        The earliest time configured for the saved search
+        """
         if self.saved_search is not None:
             return self.saved_search.content[SavedSearchKeys.EARLIEST_TIME_KEY.value]
         else:
@@ -331,6 +345,9 @@ def earliest_time(self) -> str:
 
     @property
     def latest_time(self) -> str:
+        """
+        The latest time configured for the saved search
+        """
         if self.saved_search is not None:
             return self.saved_search.content[SavedSearchKeys.LATEST_TIME_KEY.value]
         else:
@@ -338,6 +355,9 @@ def latest_time(self) -> str:
 
     @property
     def cron_schedule(self) -> str:
+        """
+        The cron schedule configured for the saved search
+        """
         if self.saved_search is not None:
             return self.saved_search.content[SavedSearchKeys.CRON_SCHEDULE_KEY.value]
         else:
@@ -345,6 +365,9 @@ def cron_schedule(self) -> str:
 
     @property
     def enabled(self) -> bool:
+        """
+        Whether the saved search is enabled
+        """
         if self.saved_search is not None:
             if int(self.saved_search.content[SavedSearchKeys.DISBALED_KEY.value]):
                 return False
@@ -353,20 +376,27 @@ def enabled(self) -> bool:
         else:
             raise ClientError("Something unexpected went wrong in initialization; saved_search was not populated")
 
-    # @property
-    # def risk_analysis_action(self) -> Optional[RiskAnalysisAction]:
-    #     if self._risk_analysis_action is None:
-    #         self._parse_risk_analysis_action()
-    #     return self._risk_analysis_action
+    @ property
+    def has_risk_analysis_action(self) -> bool:
+        """Whether the correlation search has an associated risk analysis Adaptive Response Action
+        :return: a boolean indicating whether it has a risk analysis Adaptive Response Action
+        """
+        return self.risk_analysis_action is not None
 
-    # @property
-    # def notable_action(self) -> Optional[NotableAction]:
-    #     if self._notable_action is None:
-    #         self._parse_notable_action()
-    #     return self._notable_action
+    @property
+    def has_notable_action(self) -> bool:
+        """Whether the correlation search has an associated notable Adaptive Response Action
+        :return: a boolean indicating whether it has a notable Adaptive Response Action
+        """
+        return self.notable_action is not None
 
     @staticmethod
     def _get_risk_analysis_action(content: dict[str, Any]) -> Optional[RiskAnalysisAction]:
+        """
+        Given the saved search content, parse the risk analysis action
+        :param content: a dict of strings to values
+        :returns: a RiskAnalysisAction, or None if none exists
+        """
         if int(content[SavedSearchKeys.RISK_ACTION_KEY.value]):
             try:
                 return RiskAnalysisAction.parse_from_dict(content)
@@ -376,13 +406,18 @@ def _get_risk_analysis_action(content: dict[str, Any]) -> Optional[RiskAnalysisA
 
     @staticmethod
     def _get_notable_action(content: dict[str, Any]) -> Optional[NotableAction]:
+        """
+        Given the saved search content, parse the notable action
+        :param content: a dict of strings to values
+        :returns: a NotableAction, or None if none exists
+        """
         # grab notable details if present
         if int(content[SavedSearchKeys.NOTABLE_ACTION_KEY.value]):
             return NotableAction.parse_from_dict(content)
         return None
 
-    def _parse_metadata(self) -> None:
-        """Parses the metadata we care about from self.saved_search.content
+    def _parse_risk_and_notable_actions(self) -> None:
+        """Parses the risk/notable metadata we care about from self.saved_search.content
 
         :raises KeyError: if self.saved_search.content does not contain a required key
         :raises json.JSONDecodeError: if the value at self.saved_search.content['action.risk.param._risk'] can't be
@@ -410,7 +445,7 @@ def refresh(self) -> None:
             self.saved_search.refresh()  # type: ignore
         except HTTPError as e:
             raise ServerError(f"HTTP error encountered during refresh: {e}")
-        self._parse_metadata()
+        self._parse_risk_and_notable_actions()
 
     def enable(self, refresh: bool = True) -> None:
         """Enables the SavedSearch
@@ -442,22 +477,8 @@ def disable(self, refresh: bool = True) -> None:
         if refresh:
             self.refresh()
 
-    @ property
-    def has_risk_analysis_action(self) -> bool:
-        """Whether the correlation search has an associated risk analysis Adaptive Response Action
-        :return: a boolean indicating whether it has a risk analysis Adaptive Response Action
-        """
-        return self.risk_analysis_action is not None
-
-    @property
-    def has_notable_action(self) -> bool:
-        """Whether the correlation search has an associated notable Adaptive Response Action
-        :return: a boolean indicating whether it has a notable Adaptive Response Action
-        """
-        return self.notable_action is not None
-
-    # TODO: evaluate sane defaults here (e.g. 3y is good now, but maybe not always...); NOTE also that
-    # contentctl may already be munging timestamps for us
+    # TODO: evaluate sane defaults here (e.g. 3y is good now, but maybe not always...); NOTE also
+    #   that contentctl may already be munging timestamps for us
     def update_timeframe(
         self,
         earliest_time: str = ScheduleConfig.EARLIEST_TIME.value,
@@ -476,7 +497,6 @@ def update_timeframe(
         :param cron_schedule: the cron schedule for the search to run on (default: "*/1 * * * *")
         :param refresh: a bool indicating whether to run refresh after enabling
         """
-        print(f"***** HANDLERS 2 *** {self.logger.handlers}")
         # update the SavedSearch accordingly
         data = {
             SavedSearchKeys.EARLIEST_TIME_KEY.value: earliest_time,
@@ -775,8 +795,7 @@ def _delete_index(self, index: str) -> None:
             message = f"No result returned showing deletion in index {index}"
             raise ServerError(message)
 
-    # TODO: refactor to allow for cleanup externally of the testing index; also ensure that ES is configured to use the
-    #   default index set by contenctl
+    # TODO: ensure that ES is configured to use the default index set by contenctl
     def cleanup(self, delete_test_index=False) -> None:
         """Cleans up after an integration test
 
@@ -800,10 +819,7 @@ def cleanup(self, delete_test_index=False) -> None:
             self._delete_index(index)
         self.indexes_to_purge.clear()
 
-    def update_pbar(
-        self,
-        state: str,
-    ) -> str:
+    def update_pbar(self, state: str) -> str:
         """
         Instance specific function to log integrtation testing information via pbar
         :param state: the state/message of the test to be logged

From cb16c342942b20c348c479b1c6f4759b9e5ff63a Mon Sep 17 00:00:00 2001
From: Casey McGinley <cmcginley@splunk.com>
Date: Thu, 30 Nov 2023 14:51:26 -0800
Subject: [PATCH 11/22] TODO consolidation; comments, docstrings, lint fixes

---
 .../DetectionTestingManager.py                |  1 -
 .../DetectionTestingInfrastructure.py         | 57 +++++++++++++------
 .../actions/detection_testing/progress_bar.py |  4 +-
 .../views/DetectionTestingView.py             |  1 -
 contentctl/contentctl.py                      |  2 -
 .../detection_abstract.py                     |  4 --
 contentctl/objects/base_test.py               |  3 +
 contentctl/objects/base_test_result.py        | 23 +++++++-
 contentctl/objects/correlation_search.py      | 41 ++++---------
 contentctl/objects/integration_test.py        |  1 -
 contentctl/objects/integration_test_result.py |  3 +
 contentctl/objects/test_config.py             |  3 +-
 contentctl/objects/test_group.py              |  6 +-
 13 files changed, 86 insertions(+), 63 deletions(-)

diff --git a/contentctl/actions/detection_testing/DetectionTestingManager.py b/contentctl/actions/detection_testing/DetectionTestingManager.py
index 2b4f2c24..70f9f2e1 100644
--- a/contentctl/actions/detection_testing/DetectionTestingManager.py
+++ b/contentctl/actions/detection_testing/DetectionTestingManager.py
@@ -128,7 +128,6 @@ def sigint_handler(signum, frame):
                     except Exception as e:
                         self.output_dto.terminate = True
                         print(f"Error running in container: {str(e)}")
-                        raise e
 
             self.output_dto.terminate = True
 
diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
index c9016b6f..3a7e9c18 100644
--- a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
+++ b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
@@ -45,33 +45,57 @@
     TestingStates
 )
 
-
-# TODO: cleanup extra prints/logs, add docstrings/comments
-# TODO: cleanup noqas/ignores
-# TODO: test known failures/successes
-# TODO: list SKIP, FAIL, PASS, ERROR conditions explicitly
-# TODO: SKIP/FAIL integration test on unit test fail/error (and SKIP?)
-# TODO: consolidate logic around success vs. status across unit and integration tests
-# TODO: validate duration, wait_duration, runDuration
-# TODO: consider renaming duration, wait_duration for clarity
-# TODO: add flag for enabling logging for correlation_search logging (?)
-# TODO: add flag for changing max_sleep time for integration tests
-# TODO: update extractor to work w/ new fields in output
+# DO THE FOLLOWING BEFORE REVIEW
 # TODO: look into getting better time elapsed updates on pbar statuses
-# TODO: add stats around total test cases and unit/integration test sucess/failure? maybe configurable reporting?
-# TODO: do a full package test
-# TODO: look into how index/risk index clearing surfaces to contentctl as an error (if at all)
 # TODO: look into why some pbar statuses seem to be overwriting each other (my status reporting and the
 #   completed/elapsed counter)
-# TODO: investigate failures (true, transient, latency?)
+# TODO: test known failures/successes
+# TODO: do a full package test
+# TODO: validate duration, wait_duration, runDuration
+# TODO: consolidate logic around success vs. status across unit and integration tests -> if we
+#   force the setting of a status, maybe make success a property instead of a field?; ensure that
+#   all invocations of test results have status set AND consider if SKIP should be success as True
+#   or False
+# TODO: for Detection.all_tests_successful, if all of a detections tests are skipped, it will 
+#   return True; is this desirable? We definitely don't want skips interspersed w/ passes to cause
+#   a failure overall
+
+
+# TODO: look into how notable/risk index clearing surfaces to contentctl as an error (if at all)
 # TODO: why is the unit test duration so long on Risk Rule for Dev Sec Ops by Repository? big dataset? long
 #   cleanup/upload? retries in clearing the "stash" index? Maybe the stash index gets super big... we shouldn't be
 #   testing on Correlation type detections anyway
+# TODO: cleanup extra prints/logs
+# TODO: cleanup noqas/ignores
+
+# LIST THE FOLLOWING AS PART OF THE REVIEW
+# TODO: list SKIP, FAIL, PASS, ERROR conditions explicitly
 # TODO: make it clear to Eric that test durations will no longer account for
 #   replay/cleanup in this new test grouping model (in the CLI reporting; the recorded
 #   test duration in the result file will include the setup/cleanup time in the duration
 #   for both unit and integration tests (effectively double counted))
 
+# LIST THE FOLLOWING AS (POSSIBLE) FUTURE WORK IN THE REVIEW
+# TODO: update extractor to work w/ new fields in output
+# TODO: add flag for enabling logging for correlation_search logging (?)
+# TODO: add flag for changing max_sleep time for integration tests
+# TODO: add stats around total test cases and unit/integration test sucess/failure? maybe configurable reporting?
+# TODO: add setting to skip listing skips -> test_config.TestConfig, contentctl.test, contentctl.main
+# TODO: right now, we are creating one CorrelationSearch instance for each test case; typically, there is only one
+#   unit test, and thus one integration test, per detection, so this is not an issue. However, if we start having many
+#   test cases per detection, we will be duplicating some effort & network calls that we don't need to. Consider
+#   refactoring in order to re-use CorrelationSearch objects across tests in such a case
+# TODO: ideally, we could handle this and the following init w/ a call to model_post_init, so
+#   that all the logic is encapsulated w/in _parse_risk_and_notable_actions but that is a
+#   pydantic v2 feature (see the init validators for risk/notable actions):
+#   https://docs.pydantic.dev/latest/api/base_model/#pydantic.main.BaseModel.model_post_init
+# TODO: evaluate sane defaults for timeframe for integration testing (e.g. 3y is good now, but 
+#   maybe not always...); NOTE also that contentctl may already be munging timestamps for us
+# TODO: make the search for risk/notable events a more specific query based on the search in 
+#   question (and update the docstring to relfect when you do)
+
+# GENERAL CURIOSITY
+# TODO: how often do we actually encounter tests w/ an earliest/latest time defined?
 
 class SetupTestGroupResults(BaseModel):
     exception: Union[Exception, None] = None
@@ -863,7 +887,6 @@ def execute_integration_test(
                 pbar_data=pbar_data,
             )
 
-            # TODO: add max_sleep as a configurable value on cli/config
             # Run the test
             test.result = correlation_search.test()
         except Exception as e:
diff --git a/contentctl/actions/detection_testing/progress_bar.py b/contentctl/actions/detection_testing/progress_bar.py
index df892a9c..a0fa48e5 100644
--- a/contentctl/actions/detection_testing/progress_bar.py
+++ b/contentctl/actions/detection_testing/progress_bar.py
@@ -53,8 +53,8 @@ class FinalTestingStates(str, Enum):
 
 
 # max length of a test name
-# TODO: this max size is declared, and it is used appropriatel w/ .ljust, but nothing truncates test names to makes
-#   them the appropriate size
+# TODO: this max size is declared, and it is used appropriately w/ .ljust, but nothing truncates
+#   test names to makes them the appropriate size
 MAX_TEST_NAME_LENGTH = 70
 
 # The format string used for pbar reporting
diff --git a/contentctl/actions/detection_testing/views/DetectionTestingView.py b/contentctl/actions/detection_testing/views/DetectionTestingView.py
index b234910c..8c6f5ac0 100644
--- a/contentctl/actions/detection_testing/views/DetectionTestingView.py
+++ b/contentctl/actions/detection_testing/views/DetectionTestingView.py
@@ -11,7 +11,6 @@
 from contentctl.objects.enums import DetectionStatus
 
 
-# TODO: adjust the view to report from test_groups instead of tests
 class DetectionTestingView(BaseModel, abc.ABC):
     config: TestConfig
     sync_obj: DetectionTestingManagerOutputDto
diff --git a/contentctl/contentctl.py b/contentctl/contentctl.py
index 4decbd57..ecfe3feb 100644
--- a/contentctl/contentctl.py
+++ b/contentctl/contentctl.py
@@ -180,7 +180,6 @@ def test(args: argparse.Namespace):
         config.test.detections_list = args.detections_list
     if args.enable_integration_testing or config.test.enable_integration_testing:
         config.test.enable_integration_testing = True
-    # TODO: add setting to skip listing skips
 
     # validate and setup according to infrastructure type
     if config.test.infrastructure_config.infrastructure_type == DetectionTestingTargetInfrastructure.container:
@@ -594,7 +593,6 @@ def main():
         help="Whether integration testing should be enabled, in addition to unit testing (requires a configured Splunk "
         "instance with ES installed)"
     )
-    # TODO: add setting to skip listing skips
 
     test_parser.set_defaults(func=test)
 
diff --git a/contentctl/objects/abstract_security_content_objects/detection_abstract.py b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
index 3e677fca..00935cd1 100644
--- a/contentctl/objects/abstract_security_content_objects/detection_abstract.py
+++ b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
@@ -36,9 +36,6 @@ class Detection_Abstract(SecurityContentObject):
     check_references: bool = False
     references: list
 
-    # TODO: consider restructuring this as a set of associated tests; each test should be an individual case, but 2
-    #   tests may rely on the same data and thus it doesn't make sense to replay the data twice; similarly, it doesn't
-    #   make sense to run an integration test if the unit test failed. Currently, IntegrationTest is a field in UnitTest
     tests: list[Union[UnitTest, IntegrationTest]] = []
 
     # enrichments
@@ -185,7 +182,6 @@ def datamodel_valid(cls, v, values):
                 raise ValueError("not valid data model: " + values["name"])
         return v
 
-    # TODO: if all of a detections tests are skipped, this will return True (is this desirable?)
     def all_tests_successful(self) -> bool:
         if len(self.tests) == 0:
             return False
diff --git a/contentctl/objects/base_test.py b/contentctl/objects/base_test.py
index 0df68fc7..7927089f 100644
--- a/contentctl/objects/base_test.py
+++ b/contentctl/objects/base_test.py
@@ -39,6 +39,9 @@ class BaseTest(BaseModel, ABC):
 
     @abstractmethod
     def skip(self) -> None:
+        """
+        Skip a test
+        """
         raise NotImplementedError(
             "BaseTest test is an abstract class; skip must be implemented by subclasses"
         )
diff --git a/contentctl/objects/base_test_result.py b/contentctl/objects/base_test_result.py
index df0b7090..cb2c99e0 100644
--- a/contentctl/objects/base_test_result.py
+++ b/contentctl/objects/base_test_result.py
@@ -26,16 +26,34 @@ def __str__(self) -> str:
 
 
 class BaseTestResult(BaseModel):
+    """
+    Base class for test results
+    """
+    # Message for the result
     message: Union[None, str] = None
+
+    # Any exception that was raised (may be None)
     exception: Union[Exception, None] = None
+
+    # The status (PASS, FAIL, SKIP, ERROR)
     status: Union[TestResultStatus, None] = None
+
+    # Whether the test passed (should only be True when status==PASS)
     success: bool = False
+
+    # The duration of the test in seconds
     duration: float = 0
+
+    # The search job metadata
     job_content: Union[Record, None] = None
+
+    # The Splunk endpoint URL
     sid_link: Union[None, str] = None
 
     class Config:
         validate_assignment = True
+
+        # Needed to allow for embedding of Exceptions in the model
         arbitrary_types_allowed = True
 
     @validator("success", always=True)
@@ -44,8 +62,9 @@ def derive_success_from_status(cls, v, values) -> bool:
         """
         If a status is provided at initialization, we can derive success from it
         """
-        print(f"*** VALUES ***: {values}")
+        # If a status is provided an initialization, derive success
         if ("status" in values) and (values["status"] is not None):
+            # Success is True only if status is PASS
             if values["status"] == TestResultStatus.PASS:
                 return True
             else:
@@ -54,8 +73,6 @@ def derive_success_from_status(cls, v, values) -> bool:
                 return False
         return v
 
-    # TODO: ensure that all invocations of test results have status set AND consider if SKIP should be success as True
-    #   or False
     @property
     def failed_and_complete(self) -> bool:
         """
diff --git a/contentctl/objects/correlation_search.py b/contentctl/objects/correlation_search.py
index 895c9028..2153aea9 100644
--- a/contentctl/objects/correlation_search.py
+++ b/contentctl/objects/correlation_search.py
@@ -4,10 +4,10 @@
 from enum import Enum
 
 from pydantic import BaseModel, validator, Field
-from splunklib.results import JSONResultsReader, Message  # type: ignore
-from splunklib.binding import HTTPError, ResponseReader  # type: ignore
-import splunklib.client as splunklib  # type: ignore
-from tqdm import tqdm  # type: ignore
+from splunklib.results import JSONResultsReader, Message                                            # type: ignore
+from splunklib.binding import HTTPError, ResponseReader                                             # type: ignore
+import splunklib.client as splunklib                                                                # type: ignore
+from tqdm import tqdm                                                                               # type: ignore
 
 from contentctl.objects.risk_analysis_action import RiskAnalysisAction
 from contentctl.objects.notable_action import NotableAction
@@ -185,10 +185,6 @@ class Config:
         arbitrary_types_allowed = True
 
 
-# TODO: right now, we are creating one CorrelationSearch instance for each test case; typically, there is only one
-#   unit test, and thus one integration test, per detection, so this is not an issue. However, if we start having many
-#   test cases per detection, we will be duplicating some effort & network calls that we don't need to. Consider
-#   refactoring in order to re-use CorrelationSearch objects across tests in such a case
 class CorrelationSearch(BaseModel):
     """Representation of a correlation search in Splunk
 
@@ -297,10 +293,6 @@ def _instantiate_saved_search(cls, v, values) -> str:
             values['splunk_path'],
         )
 
-    # TODO: ideally, we could handle this and the following init w/ a call to model_post_init, so
-    #   that all the logic is encapsulated w/in _parse_risk_and_notable_actions but that is a
-    #   pydantic v2 feature:
-    #   https://docs.pydantic.dev/latest/api/base_model/#pydantic.main.BaseModel.model_post_init
     @validator("risk_analysis_action", always=True)
     @classmethod
     def _init_risk_analysis_action(cls, v, values) -> Optional[RiskAnalysisAction]:
@@ -427,11 +419,11 @@ def _parse_risk_and_notable_actions(self) -> None:
         """
         # grab risk details if present
         self.risk_analysis_action = CorrelationSearch._get_risk_analysis_action(
-            self.saved_search.content  # type: ignore
+            self.saved_search.content                                                               # type: ignore
         )
 
         # grab notable details if present
-        self.notable_action = CorrelationSearch._get_notable_action(self.saved_search.content)  # type: ignore
+        self.notable_action = CorrelationSearch._get_notable_action(self.saved_search.content)      # type: ignore
 
     def refresh(self) -> None:
         """Refreshes the metadata in the SavedSearch entity, and re-parses the fields we care about
@@ -442,7 +434,7 @@ def refresh(self) -> None:
         self.logger.debug(
             f"Refreshing SavedSearch metadata for {self.name}...")
         try:
-            self.saved_search.refresh()  # type: ignore
+            self.saved_search.refresh()                                                             # type: ignore
         except HTTPError as e:
             raise ServerError(f"HTTP error encountered during refresh: {e}")
         self._parse_risk_and_notable_actions()
@@ -456,7 +448,7 @@ def enable(self, refresh: bool = True) -> None:
         """
         self.logger.debug(f"Enabling {self.name}...")
         try:
-            self.saved_search.enable()  # type: ignore
+            self.saved_search.enable()                                                              # type: ignore
         except HTTPError as e:
             raise ServerError(f"HTTP error encountered while enabling detection: {e}")
         if refresh:
@@ -471,14 +463,12 @@ def disable(self, refresh: bool = True) -> None:
         """
         self.logger.debug(f"Disabling {self.name}...")
         try:
-            self.saved_search.disable()  # type: ignore
+            self.saved_search.disable()                                                             # type: ignore
         except HTTPError as e:
             raise ServerError(f"HTTP error encountered while disabling detection: {e}")
         if refresh:
             self.refresh()
 
-    # TODO: evaluate sane defaults here (e.g. 3y is good now, but maybe not always...); NOTE also
-    #   that contentctl may already be munging timestamps for us
     def update_timeframe(
         self,
         earliest_time: str = ScheduleConfig.EARLIEST_TIME.value,
@@ -506,7 +496,7 @@ def update_timeframe(
         self.logger.info(data)
         self.logger.info(f"Updating timeframe for '{self.name}': {data}")
         try:
-            self.saved_search.update(**data)  # type: ignore
+            self.saved_search.update(**data)                                                        # type: ignore
         except HTTPError as e:
             raise ServerError(f"HTTP error encountered while updating timeframe: {e}")
 
@@ -535,16 +525,12 @@ def risk_event_exists(self) -> bool:
         Queries the `risk` index and returns True if a risk event exists
         :return: a bool indicating whether a risk event exists in the risk index
         """
-        # TODO: make this a more specific query based on the search in question (and update the docstring to relfect
-        # when you do)
         # construct our query and issue our search job on the risk index
         query = "search index=risk | head 1"
         result_iterator = self._search(query)
         try:
             for result in result_iterator:
                 # we return True if we find at least one risk object
-                # TODO: re-evaluate this condition; we may want to look for multiple risk objects on different
-                #   entitities
                 # (e.g. users vs systems) and we may want to do more confirmational testing
                 if result["index"] == Indexes.RISK_INDEX.value:
                     self.logger.debug(
@@ -562,8 +548,6 @@ def notable_event_exists(self) -> bool:
         Queries the `notable` index and returns True if a risk event exists
         :return: a bool indicating whether a risk event exists in the risk index
         """
-        # TODO: make this a more specific query based on the search in question (and update the docstring to reflect
-        #   when you do)
         # construct our query and issue our search job on the risk index
         query = "search index=notable | head 1"
         result_iterator = self._search(query)
@@ -721,7 +705,7 @@ def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP.value, raise_on_exc: boo
                     wait_duration=elapsed_sleep_time,
                     exception=e,
                 )
-                self.logger.exception(f"{result.status.name}: {result.message}")  # type: ignore
+                self.logger.exception(f"{result.status.name}: {result.message}")                    # type: ignore
             else:
                 raise e
 
@@ -795,7 +779,6 @@ def _delete_index(self, index: str) -> None:
             message = f"No result returned showing deletion in index {index}"
             raise ServerError(message)
 
-    # TODO: ensure that ES is configured to use the default index set by contenctl
     def cleanup(self, delete_test_index=False) -> None:
         """Cleans up after an integration test
 
@@ -814,7 +797,7 @@ def cleanup(self, delete_test_index=False) -> None:
 
         # delete the indexes
         if delete_test_index:
-            self.indexes_to_purge.add(self.test_index)  # type: ignore
+            self.indexes_to_purge.add(self.test_index)                                              # type: ignore
         for index in self.indexes_to_purge:
             self._delete_index(index)
         self.indexes_to_purge.clear()
diff --git a/contentctl/objects/integration_test.py b/contentctl/objects/integration_test.py
index d3249b32..6aa15333 100644
--- a/contentctl/objects/integration_test.py
+++ b/contentctl/objects/integration_test.py
@@ -18,7 +18,6 @@ class IntegrationTest(BaseTest):
     # The test result
     result: Union[None, IntegrationTestResult] = None
 
-    # TODO: how often do we actually encounter tests w/ an earliest/latest time defined?
     @classmethod
     def derive_from_unit_test(cls, unit_test: UnitTest) -> Self:
         """
diff --git a/contentctl/objects/integration_test_result.py b/contentctl/objects/integration_test_result.py
index 8d2d8284..429a466f 100644
--- a/contentctl/objects/integration_test_result.py
+++ b/contentctl/objects/integration_test_result.py
@@ -4,5 +4,8 @@
 
 
 class IntegrationTestResult(BaseTestResult):
+    """
+    An integration test result
+    """
     # the total time we slept waiting for the detection to fire after activating it
     wait_duration: Optional[int] = None
diff --git a/contentctl/objects/test_config.py b/contentctl/objects/test_config.py
index 8687fa5f..4cae8c94 100644
--- a/contentctl/objects/test_config.py
+++ b/contentctl/objects/test_config.py
@@ -491,8 +491,7 @@ class TestConfig(BaseModel, extra=Extra.forbid, validate_assignment=True):
         title="Whether integration testing should be enabled, in addition to unit testing (requires a configured Splunk"
         " instance with ES installed)"
     )
-    # TODO: add setting to skip listing skips
-    
+
 
 
 
diff --git a/contentctl/objects/test_group.py b/contentctl/objects/test_group.py
index 2d80fe3f..56511d28 100644
--- a/contentctl/objects/test_group.py
+++ b/contentctl/objects/test_group.py
@@ -11,13 +11,17 @@
 class TestGroup(BaseModel):
     """
     Groups of different types of tests relying on the same attack data
+    :param name: Name of the TestGroup (typically derived from a unit test as 
+        "{detection.name}:{test.name}")
+    :param unit_test: a UnitTest
+    :param integration_test: an IntegrationTest
+    :param attack_data: the attack data associated with tests in the TestGroup
     """
     name: str
     unit_test: UnitTest
     integration_test: IntegrationTest
     attack_data: list[UnitTestAttackData]
 
-    # TODO: how often do we actually encounter tests w/ an earliest/latest time defined?
     @classmethod
     def derive_from_unit_test(cls, unit_test: UnitTest, name_prefix: str) -> Self:
         """

From 3d288618fd85a1c51a6cb1e8a73cff2a087b0e4c Mon Sep 17 00:00:00 2001
From: Casey McGinley <cmcginley@splunk.com>
Date: Tue, 5 Dec 2023 14:13:51 -0800
Subject: [PATCH 12/22] Consolidated logic around success vs state (success is
 now a property) Ensured status is always applied and success is derived from
 status Skip is now considered a success Removed some extraneous writes
 Updated set_job_content to work w/ status Added saved_search link for
 integration test reporting Consolidated logic for detecting when to pause for
 a user into a separate function Tests w/o result get an error result added
 Unit fails/errors now propagate to unit tests as a fail/error instead of a
 skip Missing fields are included as None instead of being excluded entirely
 from YAML Removed unit_test_test.py (not used)

---
 .../DetectionTestingInfrastructure.py         | 231 ++++++++++++------
 .../actions/detection_testing/progress_bar.py |   7 +-
 .../views/DetectionTestingViewWeb.py          |   3 +-
 .../detection_abstract.py                     |  34 ++-
 contentctl/objects/base_test.py               |   2 +-
 contentctl/objects/base_test_result.py        |  60 +++--
 contentctl/objects/correlation_search.py      |  23 +-
 contentctl/objects/integration_test_result.py |  24 ++
 contentctl/objects/unit_test_result.py        |  38 +--
 contentctl/objects/unit_test_test.py          |  18 --
 10 files changed, 281 insertions(+), 159 deletions(-)
 delete mode 100644 contentctl/objects/unit_test_test.py

diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
index 3a7e9c18..2969250b 100644
--- a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
+++ b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
@@ -25,10 +25,11 @@
 
 from contentctl.objects.enums import PostTestBehavior
 from contentctl.objects.detection import Detection
+from contentctl.objects.base_test import BaseTest
 from contentctl.objects.unit_test import UnitTest
+from contentctl.objects.integration_test import IntegrationTest
 from contentctl.objects.unit_test_attack_data import UnitTestAttackData
 from contentctl.objects.unit_test_result import UnitTestResult
-from contentctl.objects.integration_test import IntegrationTest
 from contentctl.objects.integration_test_result import IntegrationTestResult
 from contentctl.objects.test_config import TestConfig, Infrastructure
 from contentctl.objects.test_group import TestGroup
@@ -46,27 +47,19 @@
 )
 
 # DO THE FOLLOWING BEFORE REVIEW
+# TODO: handle correlation type tests
+# TODO: test known failures/successes
 # TODO: look into getting better time elapsed updates on pbar statuses
 # TODO: look into why some pbar statuses seem to be overwriting each other (my status reporting and the
 #   completed/elapsed counter)
-# TODO: test known failures/successes
 # TODO: do a full package test
-# TODO: validate duration, wait_duration, runDuration
-# TODO: consolidate logic around success vs. status across unit and integration tests -> if we
-#   force the setting of a status, maybe make success a property instead of a field?; ensure that
-#   all invocations of test results have status set AND consider if SKIP should be success as True
-#   or False
-# TODO: for Detection.all_tests_successful, if all of a detections tests are skipped, it will 
-#   return True; is this desirable? We definitely don't want skips interspersed w/ passes to cause
-#   a failure overall
-
-
 # TODO: look into how notable/risk index clearing surfaces to contentctl as an error (if at all)
 # TODO: why is the unit test duration so long on Risk Rule for Dev Sec Ops by Repository? big dataset? long
 #   cleanup/upload? retries in clearing the "stash" index? Maybe the stash index gets super big... we shouldn't be
 #   testing on Correlation type detections anyway
 # TODO: cleanup extra prints/logs
 # TODO: cleanup noqas/ignores
+# TODO: set correlation_search.ENABLE_LOGGING to False
 
 # LIST THE FOLLOWING AS PART OF THE REVIEW
 # TODO: list SKIP, FAIL, PASS, ERROR conditions explicitly
@@ -89,14 +82,18 @@
 #   that all the logic is encapsulated w/in _parse_risk_and_notable_actions but that is a
 #   pydantic v2 feature (see the init validators for risk/notable actions):
 #   https://docs.pydantic.dev/latest/api/base_model/#pydantic.main.BaseModel.model_post_init
-# TODO: evaluate sane defaults for timeframe for integration testing (e.g. 3y is good now, but 
+# TODO: evaluate sane defaults for timeframe for integration testing (e.g. 3y is good now, but
 #   maybe not always...); NOTE also that contentctl may already be munging timestamps for us
-# TODO: make the search for risk/notable events a more specific query based on the search in 
+# TODO: make the search for risk/notable events a more specific query based on the search in
 #   question (and update the docstring to relfect when you do)
+# TODO: break up the execute routines for integration/unit tests some more to remove code w/
+#   similar structure
+# TODO: skip docker and TA ext pull for custom infrastructure
 
 # GENERAL CURIOSITY
 # TODO: how often do we actually encounter tests w/ an earliest/latest time defined?
 
+
 class SetupTestGroupResults(BaseModel):
     exception: Union[Exception, None] = None
     success: bool = True
@@ -514,11 +511,6 @@ def test_detection(self, detection: Detection) -> None:
                     2
                 )
 
-            self.pbar.write(f"unit_test.duration: {test_group.unit_test.result.duration}")
-            self.pbar.write(f"integration_test.duration: {test_group.integration_test.result.duration}")
-            self.pbar.write(f"setup_results.duration: {setup_results.duration}")
-            self.pbar.write(f"cleanup_results.duration: {cleanup_results.duration}")
-
             # Write test group status
             self.pbar.write(
                 self.format_pbar_string(
@@ -539,7 +531,6 @@ def setup_test_group(self, test_group: TestGroup) -> SetupTestGroupResults:
         """
         # Capture the setup start time
         setup_start_time = time.time()
-        self.pbar.write(f"setup_start_time: {setup_start_time}")
 
         # Log the start of the test group
         self.pbar.reset()
@@ -560,6 +551,7 @@ def setup_test_group(self, test_group: TestGroup) -> SetupTestGroupResults:
         try:
             self.replay_attack_data_files(test_group, setup_start_time)
         except Exception as e:
+            print("\n\nexception replaying attack data files\n\n")
             results.exception = e
             results.success = False
 
@@ -686,11 +678,11 @@ def execute_unit_test(
 
         # if the replay failed, record the test failure and return
         if not setup_results.success:
-            print("\n\nexception replaying attack data files\n\n")
             test.result = UnitTestResult()
             test.result.set_job_content(
                 None,
                 self.infrastructure,
+                TestResultStatus.ERROR,
                 exception=setup_results.exception,
                 duration=time.time() - test_start_time
             )
@@ -734,29 +726,23 @@ def execute_unit_test(
             print("\n\nexception trying search until timeout\n\n")
             test.result = UnitTestResult()
             test.result.set_job_content(
-                None, self.infrastructure, exception=e, duration=time.time() - test_start_time
+                None,
+                self.infrastructure,
+                TestResultStatus.ERROR,
+                exception=e,
+                duration=time.time() - test_start_time
             )
 
         # Pause here if the terminate flag has NOT been set AND either of the below are true:
         #   1. the behavior is always_pause
         #   2. the behavior is pause_on_failure and the test failed
-        if (
-            self.global_config.post_test_behavior == PostTestBehavior.always_pause
-            or (
-                self.global_config.post_test_behavior == PostTestBehavior.pause_on_failure
-                and (test.result is None or test.result.success is False)
-            )
-        ) and not self.sync_obj.terminate:
-            # Assume failure
-            res = "FAIL"
-
+        if self.pause_for_user(test):
             # Determine the state to report to the user
             if test.result is None:
                 res = "ERROR"
                 link = detection.search
             else:
-                if test.result.success:
-                    res = "PASS"
+                res = test.result.status.value.upper()
                 link = test.result.get_summary_dict()["sid_link"]
 
             self.format_pbar_string(
@@ -772,8 +758,17 @@ def execute_unit_test(
             except Exception:
                 pass
 
-        # Report success
-        if test.result is not None and test.result.success:
+        # Treat the case where no result is created as an error
+        if test.result is None:
+            message = "TEST ERROR: No result generated during testing"
+            test.result = UnitTestResult(
+                message=message,
+                exception=ValueError(message),
+                status=TestResultStatus.ERROR
+            )
+
+        # Report a pass
+        if test.result.status == TestResultStatus.PASS:
             self.pbar.write(
                 self.format_pbar_string(
                     TestReportingType.UNIT,
@@ -783,8 +778,19 @@ def execute_unit_test(
                     set_pbar=False,
                 )
             )
-        else:
-            # Report failure
+        elif test.result.status == TestResultStatus.SKIP:
+            # Report a skip
+            self.pbar.write(
+                self.format_pbar_string(
+                    TestReportingType.UNIT,
+                    f"{detection.name}:{test.name}",
+                    FinalTestingStates.SKIP.value,
+                    start_time=test_start_time,
+                    set_pbar=False,
+                )
+            )
+        elif test.result.status == TestResultStatus.FAIL:
+            # Report a FAIL
             self.pbar.write(
                 self.format_pbar_string(
                     TestReportingType.UNIT,
@@ -794,13 +800,27 @@ def execute_unit_test(
                     set_pbar=False,
                 )
             )
+        elif test.result.status == TestResultStatus.ERROR:
+            # Report an ERROR
+            self.pbar.write(
+                self.format_pbar_string(
+                    TestReportingType.UNIT,
+                    f"{detection.name}:{test.name}",
+                    FinalTestingStates.ERROR.value,
+                    test_start_time,
+                    set_pbar=False,
+                )
+            )
+        else:
+            # Status was None or some other unexpected value
+            raise ValueError(
+                f"Status for (unit) '{detection.name}:{test.name}' was an unexpected"
+                f"value: {test.result.status}"
+            )
 
-        # Flush stdout for pbar
+        # Flush stdout and set duration
         stdout.flush()
-
-        # Add duration
-        if test.result is not None:
-            test.result.duration = round(time.time() - test_start_time, 2)
+        test.result.duration = round(time.time() - test_start_time, 2)
 
     def execute_integration_test(
         self,
@@ -816,19 +836,38 @@ def execute_integration_test(
         # Capture unit test start time
         test_start_time = time.time()
 
-        # First, check to see if the unit test failed; skip integration testing if so
-        if (unit_test_result is not None) and (unit_test_result.status == TestResultStatus.FAIL):
-            test.skip("TEST SKIPPED: associated unit test failed")
+        # First, check to see if the unit test failed; preemptively fail integration testing if so
+        if unit_test_result is not None:
+            # check status is set (complete) and if failed (FAIL/ERROR)
+            if unit_test_result.complete and unit_test_result.failed:
+                test.result = IntegrationTestResult(
+                    message="TEST FAILED (preemptive): associated unit test failed or encountered an error",
+                    exception=unit_test_result.exception,
+                    status=unit_test_result.status,
+                )
 
-        # Next, check to see if this test has been skipped (just now or elsewhere); log and return
-        # if so
-        if test.result is not None and test.result.status == TestResultStatus.SKIP:
-            # report the skip to the CLI
+        # Next, check to see if this test has already had its status set (just now or elsewhere);
+        # log and return if so
+        if (test.result is not None) and test.result.complete:
+            # Determine the reporting state (we should only encounter SKIP/FAIL/ERROR)
+            state: str
+            if test.result.status == TestResultStatus.SKIP:
+                state = FinalTestingStates.SKIP.value
+            elif test.result.status == TestResultStatus.FAIL:
+                state = FinalTestingStates.FAIL.value
+            elif test.result.status == TestResultStatus.ERROR:
+                state = FinalTestingStates.ERROR.value
+            else:
+                raise ValueError(
+                    f"Status for (integration) '{detection.name}:{test.name}' was preemptively set"
+                    f"to an unexpected value: {test.result.status}"
+                )
+            # report the status to the CLI
             self.pbar.write(
                 self.format_pbar_string(
                     TestReportingType.INTEGRATION,
                     f"{detection.name}:{test.name}",
-                    FinalTestingStates.SKIP.value,
+                    state,
                     start_time=test_start_time,
                     set_pbar=False,
                 )
@@ -846,16 +885,11 @@ def execute_integration_test(
 
         # if the replay failed, record the test failure and return
         if not setup_results.success:
-            status: TestResultStatus
-            if setup_results.exception is not None:
-                status = TestResultStatus.ERROR
-            else:
-                status = TestResultStatus.FAIL
             test.result = IntegrationTestResult(
-                message="TEST FAILED: something went wrong during during TestGroup setup (e.g. attack data replay)",
+                message="TEST FAILED (ERROR): something went wrong during during TestGroup setup (e.g. attack data replay)",
                 exception=setup_results.exception,
                 duration=round(time.time() - test_start_time, 2),
-                status=status
+                status=TestResultStatus.ERROR
             )
 
             # report the failure to the CLI
@@ -900,23 +934,20 @@ def execute_integration_test(
         # Pause here if the terminate flag has NOT been set AND either of the below are true:
         #   1. the behavior is always_pause
         #   2. the behavior is pause_on_failure and the test failed
-        if (
-            self.global_config.post_test_behavior == PostTestBehavior.always_pause
-            or (
-                self.global_config.post_test_behavior == PostTestBehavior.pause_on_failure
-                and (test.result is None or test.result.failed_and_complete())
-            )
-        ) and not self.sync_obj.terminate:
+        if self.pause_for_user(test):
             # Determine the state to report to the user
             if test.result is None:
                 res = "ERROR"
             else:
                 res = test.result.status.value.upper()
 
+            # Get the link to the saved search in this specific instance
+            link = test.result.get_saved_search_url(self.infrastructure)
+
             self.format_pbar_string(
                 TestReportingType.INTEGRATION,
                 f"{detection.name}:{test.name}",
-                f"{res} (CTRL+D to continue)",
+                f"{res} - {link} (CTRL+D to continue)",
                 test_start_time,
             )
 
@@ -926,8 +957,17 @@ def execute_integration_test(
             except Exception:
                 pass
 
+        # Treat the case where no result is created as an error
+        if test.result is None:
+            message = "TEST ERROR: No result generated during testing"
+            test.result = IntegrationTestResult(
+                message=message,
+                exception=ValueError(message),
+                status=TestResultStatus.ERROR
+            )
+
         # Report a pass
-        if (test.result is not None) and (test.result.status == TestResultStatus.PASS):
+        if test.result.status == TestResultStatus.PASS:
             self.pbar.write(
                 self.format_pbar_string(
                     TestReportingType.INTEGRATION,
@@ -937,7 +977,7 @@ def execute_integration_test(
                     set_pbar=False,
                 )
             )
-        elif (test.result is not None) and (test.result.status == TestResultStatus.SKIP):
+        elif test.result.status == TestResultStatus.SKIP:
             # Report a skip
             self.pbar.write(
                 self.format_pbar_string(
@@ -948,8 +988,8 @@ def execute_integration_test(
                     set_pbar=False,
                 )
             )
-        else:
-            # report the failure to the CLI
+        elif test.result.status == TestResultStatus.FAIL:
+            # report a FAIL
             self.pbar.write(
                 self.format_pbar_string(
                     TestReportingType.INTEGRATION,
@@ -959,11 +999,47 @@ def execute_integration_test(
                     set_pbar=False,
                 )
             )
+        elif test.result.status == TestResultStatus.ERROR:
+            # report an ERROR
+            self.pbar.write(
+                self.format_pbar_string(
+                    TestReportingType.INTEGRATION,
+                    f"{detection.name}:{test.name}",
+                    FinalTestingStates.ERROR.value,
+                    start_time=test_start_time,
+                    set_pbar=False,
+                )
+            )
+        else:
+            # Status was None or some other unexpected value
+            raise ValueError(
+                f"Status for (integration) '{detection.name}:{test.name}' was an "
+                f"unexpected value: {test.result.status}"
+            )
 
         # Flush stdout and set duration
         stdout.flush()
-        if test.result is not None:
-            test.result.duration = round(time.time() - test_start_time, 2)
+        test.result.duration = round(time.time() - test_start_time, 2)
+
+    def pause_for_user(self, test: BaseTest) -> bool:
+        """
+        Returns true if test execution should pause for user investigation
+        :param test: the test instance
+        :returns: bool where True indicates we should pause for the user
+        """
+        # Ensure the worker has not been terminated
+        if not self.sync_obj.terminate:
+            # check if the behavior is to always pause
+            if self.global_config.post_test_behavior == PostTestBehavior.always_pause:
+                return True
+            elif self.global_config.post_test_behavior == PostTestBehavior.pause_on_failure:
+                # If the behavior is to pause on failure, check for failure (either explicitly, or
+                # just a lack of a result)
+                if test.result is None or test.result.failed:
+                    return True
+
+        # Otherwise, don't pause
+        return False
 
     def retry_search_until_timeout(
         self,
@@ -1052,10 +1128,10 @@ def retry_search_until_timeout(
                         test.result.set_job_content(
                             job.content,
                             self.infrastructure,
+                            TestResultStatus.ERROR,
                             exception=e,
-                            success=False,
                             duration=time.time() - search_start_time,
-                            )
+                        )
 
                         return
 
@@ -1073,10 +1149,9 @@ def retry_search_until_timeout(
                         test.result.set_job_content(
                             job.content,
                             self.infrastructure,
-                            success=True,
+                            TestResultStatus.PASS,
                             duration=time.time() - search_start_time,
                         )
-                        self.pbar.write(f"runDuration: {test.result.job_content.runDuration}")
                         return
 
                     else:
@@ -1090,8 +1165,8 @@ def retry_search_until_timeout(
                 test.result.set_job_content(
                     job.content,
                     self.infrastructure,
+                    TestResultStatus.ERROR,
                     exception=e,
-                    success=False,
                     duration=time.time() - search_start_time,
                 )
 
@@ -1103,7 +1178,7 @@ def retry_search_until_timeout(
                 test.result.set_job_content(
                     job.content,
                     self.infrastructure,
-                    success=False,
+                    TestResultStatus.FAIL,
                     duration=time.time() - search_start_time,
                 )
                 tick += 1
diff --git a/contentctl/actions/detection_testing/progress_bar.py b/contentctl/actions/detection_testing/progress_bar.py
index a0fa48e5..45e30e06 100644
--- a/contentctl/actions/detection_testing/progress_bar.py
+++ b/contentctl/actions/detection_testing/progress_bar.py
@@ -47,9 +47,10 @@ class FinalTestingStates(str, Enum):
     """
     The possible final states for a test (for pbar reporting)
     """
-    FAIL = "\x1b[0;30;41m" + "FAIL".ljust(LONGEST_STATE) + "\x1b[0m"
-    PASS = "\x1b[0;30;42m" + "PASS".ljust(LONGEST_STATE) + "\x1b[0m"
-    SKIP = "\x1b[0;30;47m" + "SKIP".ljust(LONGEST_STATE) + "\x1b[0m"
+    FAIL = "\x1b[0;30;41m" + "FAIL ".ljust(LONGEST_STATE) + "\x1b[0m"
+    ERROR = "\x1b[0;30;41m" + "ERROR".ljust(LONGEST_STATE) + "\x1b[0m"
+    PASS = "\x1b[0;30;42m" + "PASS ".ljust(LONGEST_STATE) + "\x1b[0m"
+    SKIP = "\x1b[0;30;47m" + "SKIP ".ljust(LONGEST_STATE) + "\x1b[0m"
 
 
 # max length of a test name
diff --git a/contentctl/actions/detection_testing/views/DetectionTestingViewWeb.py b/contentctl/actions/detection_testing/views/DetectionTestingViewWeb.py
index fe1289ae..5e2e46c0 100644
--- a/contentctl/actions/detection_testing/views/DetectionTestingViewWeb.py
+++ b/contentctl/actions/detection_testing/views/DetectionTestingViewWeb.py
@@ -1,9 +1,8 @@
-from bottle import route, run, template, Bottle, ServerAdapter
+from bottle import template, Bottle, ServerAdapter
 from contentctl.actions.detection_testing.views.DetectionTestingView import (
     DetectionTestingView,
 )
 
-from contentctl.objects.unit_test_result import UnitTestResult
 from wsgiref.simple_server import make_server, WSGIRequestHandler
 import jinja2
 import webbrowser
diff --git a/contentctl/objects/abstract_security_content_objects/detection_abstract.py b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
index 00935cd1..bb0322ac 100644
--- a/contentctl/objects/abstract_security_content_objects/detection_abstract.py
+++ b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
@@ -13,7 +13,6 @@
 from contentctl.objects.config import ConfigDetectionConfiguration
 from contentctl.objects.unit_test import UnitTest
 from contentctl.objects.integration_test import IntegrationTest
-from contentctl.objects.base_test_result import TestResultStatus
 from contentctl.objects.macro import Macro
 from contentctl.objects.lookup import Lookup
 from contentctl.objects.baseline import Baseline
@@ -23,7 +22,7 @@
 
 
 class Detection_Abstract(SecurityContentObject):
-    #contentType: SecurityContentType = SecurityContentType.detections
+    # contentType: SecurityContentType = SecurityContentType.detections
     type: AnalyticsType = ...
     file_path: str = None
     # status field is REQUIRED (the way to denote this with pydantic is ...)
@@ -183,16 +182,35 @@ def datamodel_valid(cls, v, values):
         return v
 
     def all_tests_successful(self) -> bool:
+        """
+        Checks that all tests in the detection succeeded. If no tests are defined, consider that a
+        failure; if any test fails (FAIL, ERROR), consider that a failure; if any test has
+        no result or no status, consider that a failure. If all tests succeed (PASS, SKIP), consider
+        the detection a success
+        :returns: bool where True indicates all tests succeeded (they existed, complete and were 
+            PASS/SKIP)
+        """
+        # If no tests are defined, we consider it a failure for the detection
         if len(self.tests) == 0:
             return False
-        for test in self.tests:
-            # Ignore any skipped tests
-            if (test.result is not None) and (test.result.status == TestResultStatus.SKIP):
-                continue
 
-            # If any result is missing or if any has a failure, the return False
-            if test.result is None or test.result.success is False:
+        # Iterate over tests
+        for test in self.tests:
+            # Check that test.result is not None
+            if test.result is not None:
+                # Check status is set (complete)
+                if test.result.complete:
+                    # Check for failure (FAIL, ERROR)
+                    if test.result.failed:
+                        return False
+                else:
+                    # If no stauts, return False
+                    return False
+            else:
+                # If no result, return False
                 return False
+
+        # If all tests are successful (PASS/SKIP), return True
         return True
 
     def get_summary(
diff --git a/contentctl/objects/base_test.py b/contentctl/objects/base_test.py
index 7927089f..06a6ed1a 100644
--- a/contentctl/objects/base_test.py
+++ b/contentctl/objects/base_test.py
@@ -38,7 +38,7 @@ class BaseTest(BaseModel, ABC):
     result: Union[None, BaseTestResult] = None
 
     @abstractmethod
-    def skip(self) -> None:
+    def skip(self, message: str) -> None:
         """
         Skip a test
         """
diff --git a/contentctl/objects/base_test_result.py b/contentctl/objects/base_test_result.py
index cb2c99e0..e968d6df 100644
--- a/contentctl/objects/base_test_result.py
+++ b/contentctl/objects/base_test_result.py
@@ -1,7 +1,7 @@
 from typing import Union
 from enum import Enum
 
-from pydantic import BaseModel, validator
+from pydantic import BaseModel
 from splunklib.data import Record
 
 from contentctl.helper.utils import Utils
@@ -18,7 +18,8 @@ class TestResultStatus(str, Enum):
     # Test skipped (nothing testable at present)
     SKIP = "skip"
 
-    # Error/exception encountered during testing (e.g. network error)
+    # Error/exception encountered during testing (e.g. network error); considered a failure as well
+    # for the purpose aggregate metric gathering
     ERROR = "error"
 
     def __str__(self) -> str:
@@ -38,9 +39,6 @@ class BaseTestResult(BaseModel):
     # The status (PASS, FAIL, SKIP, ERROR)
     status: Union[TestResultStatus, None] = None
 
-    # Whether the test passed (should only be True when status==PASS)
-    success: bool = False
-
     # The duration of the test in seconds
     duration: float = 0
 
@@ -56,33 +54,39 @@ class Config:
         # Needed to allow for embedding of Exceptions in the model
         arbitrary_types_allowed = True
 
-    @validator("success", always=True)
-    @classmethod
-    def derive_success_from_status(cls, v, values) -> bool:
+    # TODO: add validator which makes a lack of exception incompatible with status ERROR
+
+    @property
+    def passed(self) -> bool:
         """
-        If a status is provided at initialization, we can derive success from it
+        Property returning True if status is PASS; False otherwise (SKIP, FAIL, ERROR).
+        :returns: bool indicating success/failure
         """
-        # If a status is provided an initialization, derive success
-        if ("status" in values) and (values["status"] is not None):
-            # Success is True only if status is PASS
-            if values["status"] == TestResultStatus.PASS:
-                return True
-            else:
-                if v is not False:
-                    raise ValueError(f"Status {values['status'].value} is not compatible with success={v}")
-                return False
-        return v
+        return self.status == TestResultStatus.PASS
+
+    @property
+    def success(self) -> bool:
+        """
+        Property returning True if status is PASS or SKIP; False otherwise (FAIL, ERROR).
+        :returns: bool indicating success/failure
+        """
+        return self.status in [TestResultStatus.PASS, TestResultStatus.SKIP]
 
     @property
-    def failed_and_complete(self) -> bool:
+    def failed(self) -> bool:
         """
-        Uses status to determine if a test was a failure; useful because success == False for a SKIP, but it is also
-        not a failure
-        :returns: bool indicating the test failed and is complete (in that it has a status)
+        Property returning True if status is FAIL or ERROR; False otherwise (PASS, SKIP)
+        :returns: bool indicating fialure if True
         """
-        if self.status is not None:
-            return self.status == TestResultStatus.FAIL or self.status == TestResultStatus.ERROR
-        return False
+        return self.status == TestResultStatus.FAIL or self.status == TestResultStatus.ERROR
+
+    @property
+    def complete(self) -> bool:
+        """
+        Property returning True when a test is complete (i.e. the result has been given a status)
+        :returns: bool indicating the test is complete (has a status) if True
+        """
+        return self.status is not None
 
     def get_summary_dict(
         self,
@@ -110,6 +114,10 @@ def get_summary_dict(
                     summary_dict[field] = str(getattr(self, field))
                 else:
                     summary_dict[field] = getattr(self, field)
+            else:
+                # If field can't be found, set it to None (useful as unit and integration tests have
+                # small differences in the number of fields they share)
+                summary_dict[field] = None
 
         # Grab the job content fields required
         for field in job_fields:
diff --git a/contentctl/objects/correlation_search.py b/contentctl/objects/correlation_search.py
index 2153aea9..b72d0409 100644
--- a/contentctl/objects/correlation_search.py
+++ b/contentctl/objects/correlation_search.py
@@ -620,10 +620,15 @@ def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP.value, raise_on_exc: boo
             # skip test if no risk or notable action defined
             if not self.has_risk_analysis_action and not self.has_notable_action:
                 message = (
-                    f"No risk analysis or notable Adaptive Response actions defined for '{self.name}'; skipping "
-                    "integration test"
+                    f"TEST SKIPPED: No risk analysis or notable Adaptive Response actions defined "
+                    f"for '{self.name}'; skipping integration test"
+                )
+                result = IntegrationTestResult(
+                    message=message,
+                    status=TestResultStatus.SKIP,
+                    wait_duration=0,
+                    saved_search_path=self.saved_search.path,
                 )
-                result = IntegrationTestResult(message=message, status=TestResultStatus.SKIP, wait_duration=0)
             else:
                 # force the detection to run
                 self.logger.info(f"Forcing a run on {self.name}")
@@ -653,8 +658,9 @@ def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP.value, raise_on_exc: boo
                         if not self.risk_event_exists():
                             result = IntegrationTestResult(
                                 status=TestResultStatus.FAIL,
-                                message=f"No matching risk event created for '{self.name}'",
+                                message=f"TEST FAILED: No matching risk event created for '{self.name}'",
                                 wait_duration=elapsed_sleep_time,
+                                saved_search_path=self.saved_search.path,
                             )
                         else:
                             self.indexes_to_purge.add(Indexes.RISK_INDEX.value)
@@ -668,8 +674,9 @@ def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP.value, raise_on_exc: boo
                             # adding more descriptive test results
                             result = IntegrationTestResult(
                                 status=TestResultStatus.FAIL,
-                                message=f"No matching notable event created for '{self.name}'",
+                                message=f"TEST FAILED: No matching notable event created for '{self.name}'",
                                 wait_duration=elapsed_sleep_time,
+                                saved_search_path=self.saved_search.path,
                             )
                         else:
                             self.indexes_to_purge.add(Indexes.NOTABLE_INDEX.value)
@@ -678,8 +685,9 @@ def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP.value, raise_on_exc: boo
                     if result is None:
                         result = IntegrationTestResult(
                             status=TestResultStatus.PASS,
-                            message=f"Expected risk and/or notable events were created for '{self.name}'",
+                            message=f"TEST PASSED: Expected risk and/or notable events were created for: {self.name}",
                             wait_duration=elapsed_sleep_time,
+                            saved_search_path=self.saved_search.path,
                         )
                         break
 
@@ -701,9 +709,10 @@ def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP.value, raise_on_exc: boo
             if not raise_on_exc:
                 result = IntegrationTestResult(
                     status=TestResultStatus.ERROR,
-                    message=f"Exception raised during integration test: {e}",
+                    message=f"TEST FAILED (ERROR): Exception raised during integration test: {e}",
                     wait_duration=elapsed_sleep_time,
                     exception=e,
+                    saved_search_path=self.saved_search.path,
                 )
                 self.logger.exception(f"{result.status.name}: {result.message}")                    # type: ignore
             else:
diff --git a/contentctl/objects/integration_test_result.py b/contentctl/objects/integration_test_result.py
index 429a466f..5e6068b4 100644
--- a/contentctl/objects/integration_test_result.py
+++ b/contentctl/objects/integration_test_result.py
@@ -1,11 +1,35 @@
 from typing import Optional
 
+from contentctl.objects.test_config import Infrastructure
 from contentctl.objects.base_test_result import BaseTestResult
 
 
+SAVED_SEARCH_TEMPLATE = "{server}:{web_port}/en-US/{path}"
+
+
 class IntegrationTestResult(BaseTestResult):
     """
     An integration test result
     """
     # the total time we slept waiting for the detection to fire after activating it
     wait_duration: Optional[int] = None
+
+    # the path to the saved search on the Splunk endpoint
+    saved_search_path: Optional[str] = None
+
+    def get_saved_search_url(self, infra: Infrastructure):
+        """
+        Given an Infrastructure config, return the link to the saved search (detection) we are
+        testing
+        :param infra: an Infrastructure config
+        :returns: str, the URL to the saved search
+        """
+        # If the path was not set for some reason, just return the base URL
+        path = ""
+        if self.saved_search_path is not None:
+            path = self.saved_search_path
+        return SAVED_SEARCH_TEMPLATE.format(
+                server=infra.instance_address,
+                web_port=infra.web_ui_port,
+                path=path,
+            )
diff --git a/contentctl/objects/unit_test_result.py b/contentctl/objects/unit_test_result.py
index 15b3f103..a87e12e3 100644
--- a/contentctl/objects/unit_test_result.py
+++ b/contentctl/objects/unit_test_result.py
@@ -18,23 +18,38 @@ def set_job_content(
         self,
         content: Union[Record, None],
         config: Infrastructure,
+        status: TestResultStatus,
         exception: Union[Exception, None] = None,
-        success: bool = False,
         duration: float = 0,
-    ):
-        # Set duration, exception and success
+    ) -> bool:
+        """
+        Sets various fields in the result, pulling some fields from the provided search job's
+        content
+        :param content: search job content
+        :param config: the Infrastructure config
+        :param status: the test status (TestResultStatus)
+        :param exception: an Exception raised during the test (may be None)
+        :param duration: the overall duration of the test, including data replay and cleanup time
+            (float, in seconds)
+        :returns: bool indicating test success (inclusive of PASS and SKIP)
+        """
+        # Set duration, exception and status
         self.duration = round(duration, 2)
         self.exception = exception
-        self.success = success
+        self.status = status
 
         # Set the job content, if given
         if content is not None:
             self.job_content = content
 
-            if success:
+            if self.status == TestResultStatus.PASS:
                 self.message = "TEST PASSED"
-            else:
+            elif self.status == TestResultStatus.FAIL:
                 self.message = "TEST FAILED"
+            elif self.status == TestResultStatus.ERROR:
+                self.message == "TEST FAILED (ERROR)"
+            elif self.status == TestResultStatus.SKIP:
+                self.message = "TEST SKIPPED"
 
             if not config.instance_address.startswith("http://"):
                 sid_template = f"http://{SID_TEMPLATE}"
@@ -49,18 +64,9 @@ def set_job_content(
         # TODO: this error message seems not the most helpful, since content must be None for it to be set
         elif content is None:
             self.job_content = None
-            self.success = False
+            self.status = TestResultStatus.ERROR
             if self.exception is not None:
                 self.message = f"EXCEPTION: {str(self.exception)}"
             self.sid_link = NO_SID
 
-        # Set status if the test was not already skipped
-        if self.status != TestResultStatus.SKIP:
-            if self.exception is not None:
-                self.status = TestResultStatus.ERROR
-            elif not self.success:
-                self.status = TestResultStatus.FAIL
-            else:
-                self.status = TestResultStatus.PASS
-
         return self.success
diff --git a/contentctl/objects/unit_test_test.py b/contentctl/objects/unit_test_test.py
deleted file mode 100644
index abdd7b59..00000000
--- a/contentctl/objects/unit_test_test.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from pydantic import BaseModel, validator, ValidationError
-
-
-from contentctl.objects.unit_test_attack_data import UnitTestAttackData
-from contentctl.objects.unit_test_baseline import UnitTestBaseline
-from contentctl.objects.unit_test_result import UnitTestResult
-from typing import Union
-
-
-class UnitTestTest(BaseModel):
-    name: str
-    file: str
-    pass_condition: Union[str, None] = None
-    earliest_time: Union[str, None] = None
-    latest_time: Union[str, None] = None
-    baselines: list[UnitTestBaseline] = []
-    attack_data: list[UnitTestAttackData]
-    result: Union[None, UnitTestResult]

From 30aae006bfc4c5ace9000f6630a5f8b62fcfd5f5 Mon Sep 17 00:00:00 2001
From: Casey McGinley <cmcginley@splunk.com>
Date: Tue, 5 Dec 2023 14:20:56 -0800
Subject: [PATCH 13/22] Fixing lack of Self in python 3.9

---
 contentctl/objects/integration_test.py | 4 ++--
 contentctl/objects/test_group.py       | 4 +---
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/contentctl/objects/integration_test.py b/contentctl/objects/integration_test.py
index 6aa15333..0b0b57ec 100644
--- a/contentctl/objects/integration_test.py
+++ b/contentctl/objects/integration_test.py
@@ -1,4 +1,4 @@
-from typing import Union, Self
+from typing import Union
 
 from pydantic import Field
 
@@ -19,7 +19,7 @@ class IntegrationTest(BaseTest):
     result: Union[None, IntegrationTestResult] = None
 
     @classmethod
-    def derive_from_unit_test(cls, unit_test: UnitTest) -> Self:
+    def derive_from_unit_test(cls, unit_test: UnitTest) -> "IntegrationTest":
         """
         Given a UnitTest, construct an IntegrationTest
         :param unit_test: the UnitTest
diff --git a/contentctl/objects/test_group.py b/contentctl/objects/test_group.py
index 56511d28..abf71566 100644
--- a/contentctl/objects/test_group.py
+++ b/contentctl/objects/test_group.py
@@ -1,5 +1,3 @@
-from typing import Self
-
 from pydantic import BaseModel
 
 from contentctl.objects.unit_test import UnitTest
@@ -23,7 +21,7 @@ class TestGroup(BaseModel):
     attack_data: list[UnitTestAttackData]
 
     @classmethod
-    def derive_from_unit_test(cls, unit_test: UnitTest, name_prefix: str) -> Self:
+    def derive_from_unit_test(cls, unit_test: UnitTest, name_prefix: str) -> "TestGroup":
         """
         Given a UnitTest and a prefix, construct a TestGroup, with in IntegrationTest corresponding to the UnitTest
         :param unit_test: the UnitTest

From dfc40874f4bfb188b18b081541fa6015a3d248e8 Mon Sep 17 00:00:00 2001
From: Casey McGinley <cmcginley@splunk.com>
Date: Tue, 5 Dec 2023 14:41:12 -0800
Subject: [PATCH 14/22] avoid nonetype referencing

---
 contentctl/input/director.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/contentctl/input/director.py b/contentctl/input/director.py
index 4379bc17..075fddb5 100644
--- a/contentctl/input/director.py
+++ b/contentctl/input/director.py
@@ -207,18 +207,19 @@ def constructDetection(self, builder: DetectionBuilder, file_path: str) -> None:
         builder.addPlaybook(self.output_dto.playbooks)
         builder.addMacros(self.output_dto.macros)
         builder.addLookups(self.output_dto.lookups)
-        
+
         if self.input_dto.config.enrichments.attack_enrichment:
             builder.addMitreAttackEnrichment(self.attack_enrichment)
 
         if self.input_dto.config.enrichments.cve_enrichment:
             builder.addCve()
-    
+
         if self.input_dto.config.enrichments.splunk_app_enrichment:
             builder.addSplunkApp()
 
         # Skip all integration tests if configured to do so
-        if not self.input_dto.config.test.enable_integration_testing:
+        # TODO: is there a better way to handle this? The `test` portion of the config is not defined for validate
+        if (self.input_dto.config.test is not None) and (not self.input_dto.config.test.enable_integration_testing):
             builder.skipIntegrationTests()
 
 

From d7ac1acbc857a12250795a26591e390be04f5aeb Mon Sep 17 00:00:00 2001
From: Casey McGinley <cmcginley@splunk.com>
Date: Tue, 5 Dec 2023 16:49:15 -0800
Subject: [PATCH 15/22] added support for skipping hunting/corellation for
 integrtation testing

---
 .../DetectionTestingInfrastructure.py         | 29 +++++++++++++------
 .../detection_abstract.py                     |  1 +
 contentctl/objects/correlation_search.py      |  8 ++---
 3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
index 2969250b..a0ec9f1f 100644
--- a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
+++ b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
@@ -23,7 +23,7 @@
 from urllib3 import disable_warnings
 import urllib.parse
 
-from contentctl.objects.enums import PostTestBehavior
+from contentctl.objects.enums import PostTestBehavior, AnalyticsType
 from contentctl.objects.detection import Detection
 from contentctl.objects.base_test import BaseTest
 from contentctl.objects.unit_test import UnitTest
@@ -47,19 +47,16 @@
 )
 
 # DO THE FOLLOWING BEFORE REVIEW
-# TODO: handle correlation type tests
 # TODO: test known failures/successes
 # TODO: look into getting better time elapsed updates on pbar statuses
-# TODO: look into why some pbar statuses seem to be overwriting each other (my status reporting and the
-#   completed/elapsed counter)
 # TODO: do a full package test
 # TODO: look into how notable/risk index clearing surfaces to contentctl as an error (if at all)
-# TODO: why is the unit test duration so long on Risk Rule for Dev Sec Ops by Repository? big dataset? long
-#   cleanup/upload? retries in clearing the "stash" index? Maybe the stash index gets super big... we shouldn't be
-#   testing on Correlation type detections anyway
+# TODO: test integration testing link on pause
+# TODO: test failure propagation (integration)
 # TODO: cleanup extra prints/logs
 # TODO: cleanup noqas/ignores
 # TODO: set correlation_search.ENABLE_LOGGING to False
+# TODO: review
 
 # LIST THE FOLLOWING AS PART OF THE REVIEW
 # TODO: list SKIP, FAIL, PASS, ERROR conditions explicitly
@@ -89,10 +86,17 @@
 # TODO: break up the execute routines for integration/unit tests some more to remove code w/
 #   similar structure
 # TODO: skip docker and TA ext pull for custom infrastructure
+# TODO: consider listing Correlation type detections as skips rather than excluding them from
+#   results altogether?
+# TODO: look into why some pbar statuses seem to be overwriting each other (my status reporting and the
+#   completed/elapsed counter)
+# TODO: add flag around behavior to propagate unit failures to integration
 
 # GENERAL CURIOSITY
 # TODO: how often do we actually encounter tests w/ an earliest/latest time defined?
-
+# TODO: why is the unit test duration so long on Risk Rule for Dev Sec Ops by Repository? big dataset? long
+#   cleanup/upload? retries in clearing the "stash" index? Maybe the stash index gets super big... we shouldn't be
+#   testing on Correlation type detections anyway
 
 class SetupTestGroupResults(BaseModel):
     exception: Union[Exception, None] = None
@@ -836,7 +840,14 @@ def execute_integration_test(
         # Capture unit test start time
         test_start_time = time.time()
 
-        # First, check to see if the unit test failed; preemptively fail integration testing if so
+        # First, check to see if the test should be skipped (Hunting or Correlation)
+        if detection.type in [AnalyticsType.Hunting.value, AnalyticsType.Correlation.value]:
+            test.skip(
+                f"TEST SKIPPED: detection is type {detection.type} and cannot be integration "
+                "tested at this time"
+            )
+
+        # Next, check to see if the unit test failed; preemptively fail integration testing if so
         if unit_test_result is not None:
             # check status is set (complete) and if failed (FAIL/ERROR)
             if unit_test_result.complete and unit_test_result.failed:
diff --git a/contentctl/objects/abstract_security_content_objects/detection_abstract.py b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
index bb0322ac..d7a6b9a3 100644
--- a/contentctl/objects/abstract_security_content_objects/detection_abstract.py
+++ b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
@@ -23,6 +23,7 @@
 
 class Detection_Abstract(SecurityContentObject):
     # contentType: SecurityContentType = SecurityContentType.detections
+    # NOTE: because `use_enum_values` is configured, this will actually be type str
     type: AnalyticsType = ...
     file_path: str = None
     # status field is REQUIRED (the way to denote this with pydantic is ...)
diff --git a/contentctl/objects/correlation_search.py b/contentctl/objects/correlation_search.py
index b72d0409..0f5e3f15 100644
--- a/contentctl/objects/correlation_search.py
+++ b/contentctl/objects/correlation_search.py
@@ -620,8 +620,8 @@ def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP.value, raise_on_exc: boo
             # skip test if no risk or notable action defined
             if not self.has_risk_analysis_action and not self.has_notable_action:
                 message = (
-                    f"TEST SKIPPED: No risk analysis or notable Adaptive Response actions defined "
-                    f"for '{self.name}'; skipping integration test"
+                    f"TEST SKIPPED: No risk analysis or notable Adaptive Response actions defined; "
+                    f"skipping integration test: {self.name}"
                 )
                 result = IntegrationTestResult(
                     message=message,
@@ -658,7 +658,7 @@ def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP.value, raise_on_exc: boo
                         if not self.risk_event_exists():
                             result = IntegrationTestResult(
                                 status=TestResultStatus.FAIL,
-                                message=f"TEST FAILED: No matching risk event created for '{self.name}'",
+                                message=f"TEST FAILED: No matching risk event created for: {self.name}",
                                 wait_duration=elapsed_sleep_time,
                                 saved_search_path=self.saved_search.path,
                             )
@@ -674,7 +674,7 @@ def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP.value, raise_on_exc: boo
                             # adding more descriptive test results
                             result = IntegrationTestResult(
                                 status=TestResultStatus.FAIL,
-                                message=f"TEST FAILED: No matching notable event created for '{self.name}'",
+                                message=f"TEST FAILED: No matching notable event created for: {self.name}",
                                 wait_duration=elapsed_sleep_time,
                                 saved_search_path=self.saved_search.path,
                             )

From 494e9a57dc4f8fb8d0edc4196d2874008f4f9dab Mon Sep 17 00:00:00 2001
From: Casey McGinley <cmcginley@splunk.com>
Date: Tue, 5 Dec 2023 20:38:11 -0800
Subject: [PATCH 16/22] added TODOs; fixed link reporting for integration
 testing

---
 .../DetectionTestingInfrastructure.py         | 12 +++++------
 contentctl/objects/correlation_search.py      |  5 -----
 contentctl/objects/integration_test_result.py | 20 -------------------
 3 files changed, 5 insertions(+), 32 deletions(-)

diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
index a0ec9f1f..10304ff9 100644
--- a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
+++ b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
@@ -48,15 +48,11 @@
 
 # DO THE FOLLOWING BEFORE REVIEW
 # TODO: test known failures/successes
-# TODO: look into getting better time elapsed updates on pbar statuses
 # TODO: do a full package test
-# TODO: look into how notable/risk index clearing surfaces to contentctl as an error (if at all)
-# TODO: test integration testing link on pause
-# TODO: test failure propagation (integration)
 # TODO: cleanup extra prints/logs
 # TODO: cleanup noqas/ignores
 # TODO: set correlation_search.ENABLE_LOGGING to False
-# TODO: review
+# TODO: review TODOs
 
 # LIST THE FOLLOWING AS PART OF THE REVIEW
 # TODO: list SKIP, FAIL, PASS, ERROR conditions explicitly
@@ -90,7 +86,9 @@
 #   results altogether?
 # TODO: look into why some pbar statuses seem to be overwriting each other (my status reporting and the
 #   completed/elapsed counter)
+# TODO: look into getting better time elapsed updates on pbar statuses
 # TODO: add flag around behavior to propagate unit failures to integration
+# TODO: investigate 1-off count in "Completed X/X"
 
 # GENERAL CURIOSITY
 # TODO: how often do we actually encounter tests w/ an earliest/latest time defined?
@@ -852,7 +850,7 @@ def execute_integration_test(
             # check status is set (complete) and if failed (FAIL/ERROR)
             if unit_test_result.complete and unit_test_result.failed:
                 test.result = IntegrationTestResult(
-                    message="TEST FAILED (preemptive): associated unit test failed or encountered an error",
+                    message="TEST FAILED (PREEMPTIVE): associated unit test failed or encountered an error",
                     exception=unit_test_result.exception,
                     status=unit_test_result.status,
                 )
@@ -953,7 +951,7 @@ def execute_integration_test(
                 res = test.result.status.value.upper()
 
             # Get the link to the saved search in this specific instance
-            link = test.result.get_saved_search_url(self.infrastructure)
+            link = f"https://{self.infrastructure.instance_address}:{self.infrastructure.web_ui_port}"
 
             self.format_pbar_string(
                 TestReportingType.INTEGRATION,
diff --git a/contentctl/objects/correlation_search.py b/contentctl/objects/correlation_search.py
index 0f5e3f15..626371ab 100644
--- a/contentctl/objects/correlation_search.py
+++ b/contentctl/objects/correlation_search.py
@@ -627,7 +627,6 @@ def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP.value, raise_on_exc: boo
                     message=message,
                     status=TestResultStatus.SKIP,
                     wait_duration=0,
-                    saved_search_path=self.saved_search.path,
                 )
             else:
                 # force the detection to run
@@ -660,7 +659,6 @@ def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP.value, raise_on_exc: boo
                                 status=TestResultStatus.FAIL,
                                 message=f"TEST FAILED: No matching risk event created for: {self.name}",
                                 wait_duration=elapsed_sleep_time,
-                                saved_search_path=self.saved_search.path,
                             )
                         else:
                             self.indexes_to_purge.add(Indexes.RISK_INDEX.value)
@@ -676,7 +674,6 @@ def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP.value, raise_on_exc: boo
                                 status=TestResultStatus.FAIL,
                                 message=f"TEST FAILED: No matching notable event created for: {self.name}",
                                 wait_duration=elapsed_sleep_time,
-                                saved_search_path=self.saved_search.path,
                             )
                         else:
                             self.indexes_to_purge.add(Indexes.NOTABLE_INDEX.value)
@@ -687,7 +684,6 @@ def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP.value, raise_on_exc: boo
                             status=TestResultStatus.PASS,
                             message=f"TEST PASSED: Expected risk and/or notable events were created for: {self.name}",
                             wait_duration=elapsed_sleep_time,
-                            saved_search_path=self.saved_search.path,
                         )
                         break
 
@@ -712,7 +708,6 @@ def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP.value, raise_on_exc: boo
                     message=f"TEST FAILED (ERROR): Exception raised during integration test: {e}",
                     wait_duration=elapsed_sleep_time,
                     exception=e,
-                    saved_search_path=self.saved_search.path,
                 )
                 self.logger.exception(f"{result.status.name}: {result.message}")                    # type: ignore
             else:
diff --git a/contentctl/objects/integration_test_result.py b/contentctl/objects/integration_test_result.py
index 5e6068b4..0460f633 100644
--- a/contentctl/objects/integration_test_result.py
+++ b/contentctl/objects/integration_test_result.py
@@ -13,23 +13,3 @@ class IntegrationTestResult(BaseTestResult):
     """
     # the total time we slept waiting for the detection to fire after activating it
     wait_duration: Optional[int] = None
-
-    # the path to the saved search on the Splunk endpoint
-    saved_search_path: Optional[str] = None
-
-    def get_saved_search_url(self, infra: Infrastructure):
-        """
-        Given an Infrastructure config, return the link to the saved search (detection) we are
-        testing
-        :param infra: an Infrastructure config
-        :returns: str, the URL to the saved search
-        """
-        # If the path was not set for some reason, just return the base URL
-        path = ""
-        if self.saved_search_path is not None:
-            path = self.saved_search_path
-        return SAVED_SEARCH_TEMPLATE.format(
-                server=infra.instance_address,
-                web_port=infra.web_ui_port,
-                path=path,
-            )

From 1efb284f463a7f0dcebb72e303cf3590b1d6f07a Mon Sep 17 00:00:00 2001
From: Casey McGinley <cmcginley@splunk.com>
Date: Tue, 5 Dec 2023 20:40:35 -0800
Subject: [PATCH 17/22] TODOs

---
 .../infrastructures/DetectionTestingInfrastructure.py            | 1 -
 1 file changed, 1 deletion(-)

diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
index 10304ff9..c4333f37 100644
--- a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
+++ b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
@@ -47,7 +47,6 @@
 )
 
 # DO THE FOLLOWING BEFORE REVIEW
-# TODO: test known failures/successes
 # TODO: do a full package test
 # TODO: cleanup extra prints/logs
 # TODO: cleanup noqas/ignores

From 73b442f69c2a720300b58ee1a2445da0e735e8e5 Mon Sep 17 00:00:00 2001
From: Casey McGinley <cmcginley@splunk.com>
Date: Thu, 7 Dec 2023 18:02:50 -0800
Subject: [PATCH 18/22] TODOs

---
 .../infrastructures/DetectionTestingInfrastructure.py         | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
index c4333f37..3b4fa56a 100644
--- a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
+++ b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
@@ -47,6 +47,8 @@
 )
 
 # DO THE FOLLOWING BEFORE REVIEW
+# TODO: Validate that the TestGroup methodology works across multiple instances
+# TODO: consider overall detections success vs overall pass; also list skips
 # TODO: do a full package test
 # TODO: cleanup extra prints/logs
 # TODO: cleanup noqas/ignores
@@ -87,7 +89,7 @@
 #   completed/elapsed counter)
 # TODO: look into getting better time elapsed updates on pbar statuses
 # TODO: add flag around behavior to propagate unit failures to integration
-# TODO: investigate 1-off count in "Completed X/X"
+# TODO: investigate 1-off (occasionally 2-off?) count in "Completed X/X"; noticed it when testing against multiple instances, but not against a single instance
 
 # GENERAL CURIOSITY
 # TODO: how often do we actually encounter tests w/ an earliest/latest time defined?

From 37aad00869759e16dc44d5683f9f36748e4a08b7 Mon Sep 17 00:00:00 2001
From: Casey McGinley <cmcginley@splunk.com>
Date: Wed, 13 Dec 2023 14:37:25 -0800
Subject: [PATCH 19/22] adding validator on message for better reporting to the
 user

---
 .../DetectionTestingInfrastructure.py         | 45 +++++++++++++++----
 contentctl/objects/risk_analysis_action.py    | 29 +++++++++++-
 2 files changed, 65 insertions(+), 9 deletions(-)

diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
index 3b4fa56a..ee4d919d 100644
--- a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
+++ b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
@@ -47,16 +47,15 @@
 )
 
 # DO THE FOLLOWING BEFORE REVIEW
-# TODO: Validate that the TestGroup methodology works across multiple instances
-# TODO: consider overall detections success vs overall pass; also list skips
-# TODO: do a full package test
+# TODO: Adjust extracto
 # TODO: cleanup extra prints/logs
 # TODO: cleanup noqas/ignores
 # TODO: set correlation_search.ENABLE_LOGGING to False
 # TODO: review TODOs
 
 # LIST THE FOLLOWING AS PART OF THE REVIEW
-# TODO: list SKIP, FAIL, PASS, ERROR conditions explicitly
+# TODO: list SKIP, FAIL, PASS, ERROR conditions explicitly and how the relate to success (e.g. 
+#   consider overall detections success vs overall pass)
 # TODO: make it clear to Eric that test durations will no longer account for
 #   replay/cleanup in this new test grouping model (in the CLI reporting; the recorded
 #   test duration in the result file will include the setup/cleanup time in the duration
@@ -85,17 +84,47 @@
 # TODO: skip docker and TA ext pull for custom infrastructure
 # TODO: consider listing Correlation type detections as skips rather than excluding them from
 #   results altogether?
-# TODO: look into why some pbar statuses seem to be overwriting each other (my status reporting and the
-#   completed/elapsed counter)
+# TODO: look into why some pbar statuses seem to be overwriting each other (my status reporting and
+#   the completed/elapsed counter)
 # TODO: look into getting better time elapsed updates on pbar statuses
 # TODO: add flag around behavior to propagate unit failures to integration
-# TODO: investigate 1-off (occasionally 2-off?) count in "Completed X/X"; noticed it when testing against multiple instances, but not against a single instance
+# TODO: investigate 1-off (occasionally 2-off?) count in "Completed X/X"; noticed it when testing
+#   against multiple instances, but not against a single instance
+# TODO: enforce distinct test names w/in detections
+# TODO: add section to summary called "testwise_summary" listing per test metrics (e.g. total test,
+#   total tests passed, ...); also list num skipped at both detection and test level
+# TODO: when in interactive mode, consider maybe making the cleanup routine in correlation_search
+#   happen after the user breaks the interactivity; currently risk/notable indexes are dumped
+#   before the user can inspect
+# TODO: add more granular error messaging that can show success in finding a notable, but failure
+#   in finding a risk and vice-versa
+# TODO: add logic which reports concretely that integration testing failed (or would fail) as a
+#   result of a missing victim observable
+# TODO: Fix detection_abstract.tests_validate so that it surfaces validation errors (e.g. a lack of
+#   tests) to the final results, instead of looking like the following (maybe have a message
+#   propagated at the detection level? do a separate coverage check as part of validation?):
+# - name: Azure AD PIM Role Assigned
+#   search: ' `azuread` operationName="Add eligible member to role in PIM completed*"
+#     | rename properties.* as * | rename targetResources{}.userPrincipalName as userPrincipalName
+#     | rename initiatedBy.user.userPrincipalName as initiatedBy | stats values(userPrincipalName)
+#     as userPrincipalName values(targetResources{}.displayName) as target_display_name
+#     by _time, result, operationName, initiatedBy | `azure_ad_pim_role_assigned_filter`'
+#   success: false
+#   tests: []
+# untested_detections: []
+# percent_complete: UKNOWN
+# deprecated_detections: []
+# experimental_detections: []
 
 # GENERAL CURIOSITY
 # TODO: how often do we actually encounter tests w/ an earliest/latest time defined?
 # TODO: why is the unit test duration so long on Risk Rule for Dev Sec Ops by Repository? big dataset? long
 #   cleanup/upload? retries in clearing the "stash" index? Maybe the stash index gets super big... we shouldn't be
 #   testing on Correlation type detections anyway
+# TODO: WinEvent Scheduled Task Created Within Public Path -> "1 validation error for
+#   CorrelationSearch notable_action -> rule_name none is not an allowed value
+#   (type=type_error.none.not_allowed)"
+
 
 class SetupTestGroupResults(BaseModel):
     exception: Union[Exception, None] = None
@@ -695,7 +724,7 @@ def execute_unit_test(
                 self.format_pbar_string(
                     TestReportingType.UNIT,
                     f"{detection.name}:{test.name}",
-                    FinalTestingStates.FAIL.value,
+                    FinalTestingStates.ERROR.value,
                     start_time=test_start_time,
                     set_pbar=False,
                 )
diff --git a/contentctl/objects/risk_analysis_action.py b/contentctl/objects/risk_analysis_action.py
index cdbb1674..7a5287b8 100644
--- a/contentctl/objects/risk_analysis_action.py
+++ b/contentctl/objects/risk_analysis_action.py
@@ -1,7 +1,7 @@
 from typing import Any
 import json
 
-from pydantic import BaseModel
+from pydantic import BaseModel, validator
 
 from contentctl.objects.risk_object import RiskObject
 from contentctl.objects.threat_object import ThreatObject
@@ -19,6 +19,33 @@ class RiskAnalysisAction(BaseModel):
     risk_objects: list[RiskObject]
     message: str
 
+    @validator("message", always=True, pre=True)
+    @classmethod
+    def _validate_message(cls, v, values) -> str:
+        """
+        Validate splunk_path and derive if None
+        """
+        if v is None:
+            raise ValueError(
+                "RiskAnalysisAction.message is a required field, cannot be None. Check the "
+                "detection YAML definition to ensure a message is defined"
+            )
+
+        if not isinstance(v, str):
+            raise ValueError(
+                "RiskAnalysisAction.message must be a string. Check the detection YAML definition "
+                "to ensure message is defined as a string"
+            )
+
+        if len(v.strip()) < 1:
+            raise ValueError(
+                "RiskAnalysisAction.message must be a meaningful string, with a length greater than"
+                "or equal to 1 (once stripped of trailing/leading whitespace). Check the detection "
+                "YAML definition to ensure message is defined as a meanigful string"
+            )
+
+        return v
+
     @classmethod
     def parse_from_dict(cls, dict_: dict[str, Any]) -> "RiskAnalysisAction":
         """

From bc8597c59aa80784c8249e0ad1c78c30a1ef970e Mon Sep 17 00:00:00 2001
From: Casey McGinley <cmcginley@splunk.com>
Date: Thu, 14 Dec 2023 15:15:05 -0800
Subject: [PATCH 20/22] TODOs, formatting, disabled debug logging in
 CorrelationSearch, expanded search windoe to 5y by default

---
 .../actions/detection_testing/GitService.py   |  2 +
 .../DetectionTestingInfrastructure.py         | 95 +++----------------
 .../views/DetectionTestingView.py             | 12 ++-
 contentctl/contentctl.py                      |  5 +
 .../detection_abstract.py                     |  6 +-
 contentctl/objects/base_test.py               |  1 +
 contentctl/objects/base_test_result.py        |  4 +-
 contentctl/objects/constants.py               |  4 +-
 contentctl/objects/correlation_search.py      | 20 +++-
 contentctl/objects/risk_analysis_action.py    | 31 +++---
 contentctl/objects/unit_test_result.py        |  1 -
 11 files changed, 74 insertions(+), 107 deletions(-)

diff --git a/contentctl/actions/detection_testing/GitService.py b/contentctl/actions/detection_testing/GitService.py
index 5f38f2dd..cad52fbb 100644
--- a/contentctl/actions/detection_testing/GitService.py
+++ b/contentctl/actions/detection_testing/GitService.py
@@ -82,6 +82,8 @@ def filter_detections_by_status(self, detections: list[Detection],
         #print()
         return [detection for detection in detections if DetectionStatus(detection.status) in statuses_to_test]
 
+    # TODO (cmcginley): consider listing Correlation type detections as skips rather than excluding
+    #   them from results altogether?
     def filter_detections_by_type(self, detections: list[Detection], 
                                   types_to_test: set[AnalyticsType] = {AnalyticsType.Anomaly, AnalyticsType.TTP, AnalyticsType.Hunting})->list[Detection]:
         #print("\n".join(sorted([f"{detection.file_path[92:]} - {detection.type}" for detection in detections if AnalyticsType(detection.type) not in types_to_test])))
diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
index ee4d919d..7afa34f3 100644
--- a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
+++ b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
@@ -46,85 +46,6 @@
     TestingStates
 )
 
-# DO THE FOLLOWING BEFORE REVIEW
-# TODO: Adjust extracto
-# TODO: cleanup extra prints/logs
-# TODO: cleanup noqas/ignores
-# TODO: set correlation_search.ENABLE_LOGGING to False
-# TODO: review TODOs
-
-# LIST THE FOLLOWING AS PART OF THE REVIEW
-# TODO: list SKIP, FAIL, PASS, ERROR conditions explicitly and how the relate to success (e.g. 
-#   consider overall detections success vs overall pass)
-# TODO: make it clear to Eric that test durations will no longer account for
-#   replay/cleanup in this new test grouping model (in the CLI reporting; the recorded
-#   test duration in the result file will include the setup/cleanup time in the duration
-#   for both unit and integration tests (effectively double counted))
-
-# LIST THE FOLLOWING AS (POSSIBLE) FUTURE WORK IN THE REVIEW
-# TODO: update extractor to work w/ new fields in output
-# TODO: add flag for enabling logging for correlation_search logging (?)
-# TODO: add flag for changing max_sleep time for integration tests
-# TODO: add stats around total test cases and unit/integration test sucess/failure? maybe configurable reporting?
-# TODO: add setting to skip listing skips -> test_config.TestConfig, contentctl.test, contentctl.main
-# TODO: right now, we are creating one CorrelationSearch instance for each test case; typically, there is only one
-#   unit test, and thus one integration test, per detection, so this is not an issue. However, if we start having many
-#   test cases per detection, we will be duplicating some effort & network calls that we don't need to. Consider
-#   refactoring in order to re-use CorrelationSearch objects across tests in such a case
-# TODO: ideally, we could handle this and the following init w/ a call to model_post_init, so
-#   that all the logic is encapsulated w/in _parse_risk_and_notable_actions but that is a
-#   pydantic v2 feature (see the init validators for risk/notable actions):
-#   https://docs.pydantic.dev/latest/api/base_model/#pydantic.main.BaseModel.model_post_init
-# TODO: evaluate sane defaults for timeframe for integration testing (e.g. 3y is good now, but
-#   maybe not always...); NOTE also that contentctl may already be munging timestamps for us
-# TODO: make the search for risk/notable events a more specific query based on the search in
-#   question (and update the docstring to relfect when you do)
-# TODO: break up the execute routines for integration/unit tests some more to remove code w/
-#   similar structure
-# TODO: skip docker and TA ext pull for custom infrastructure
-# TODO: consider listing Correlation type detections as skips rather than excluding them from
-#   results altogether?
-# TODO: look into why some pbar statuses seem to be overwriting each other (my status reporting and
-#   the completed/elapsed counter)
-# TODO: look into getting better time elapsed updates on pbar statuses
-# TODO: add flag around behavior to propagate unit failures to integration
-# TODO: investigate 1-off (occasionally 2-off?) count in "Completed X/X"; noticed it when testing
-#   against multiple instances, but not against a single instance
-# TODO: enforce distinct test names w/in detections
-# TODO: add section to summary called "testwise_summary" listing per test metrics (e.g. total test,
-#   total tests passed, ...); also list num skipped at both detection and test level
-# TODO: when in interactive mode, consider maybe making the cleanup routine in correlation_search
-#   happen after the user breaks the interactivity; currently risk/notable indexes are dumped
-#   before the user can inspect
-# TODO: add more granular error messaging that can show success in finding a notable, but failure
-#   in finding a risk and vice-versa
-# TODO: add logic which reports concretely that integration testing failed (or would fail) as a
-#   result of a missing victim observable
-# TODO: Fix detection_abstract.tests_validate so that it surfaces validation errors (e.g. a lack of
-#   tests) to the final results, instead of looking like the following (maybe have a message
-#   propagated at the detection level? do a separate coverage check as part of validation?):
-# - name: Azure AD PIM Role Assigned
-#   search: ' `azuread` operationName="Add eligible member to role in PIM completed*"
-#     | rename properties.* as * | rename targetResources{}.userPrincipalName as userPrincipalName
-#     | rename initiatedBy.user.userPrincipalName as initiatedBy | stats values(userPrincipalName)
-#     as userPrincipalName values(targetResources{}.displayName) as target_display_name
-#     by _time, result, operationName, initiatedBy | `azure_ad_pim_role_assigned_filter`'
-#   success: false
-#   tests: []
-# untested_detections: []
-# percent_complete: UKNOWN
-# deprecated_detections: []
-# experimental_detections: []
-
-# GENERAL CURIOSITY
-# TODO: how often do we actually encounter tests w/ an earliest/latest time defined?
-# TODO: why is the unit test duration so long on Risk Rule for Dev Sec Ops by Repository? big dataset? long
-#   cleanup/upload? retries in clearing the "stash" index? Maybe the stash index gets super big... we shouldn't be
-#   testing on Correlation type detections anyway
-# TODO: WinEvent Scheduled Task Created Within Public Path -> "1 validation error for
-#   CorrelationSearch notable_action -> rule_name none is not an allowed value
-#   (type=type_error.none.not_allowed)"
-
 
 class SetupTestGroupResults(BaseModel):
     exception: Union[Exception, None] = None
@@ -854,6 +775,8 @@ def execute_unit_test(
         stdout.flush()
         test.result.duration = round(time.time() - test_start_time, 2)
 
+    # TODO (cmcginley): break up the execute routines for integration/unit tests some more to remove
+    #   code w/ similar structure
     def execute_integration_test(
         self,
         detection: Detection,
@@ -925,7 +848,10 @@ def execute_integration_test(
         # if the replay failed, record the test failure and return
         if not setup_results.success:
             test.result = IntegrationTestResult(
-                message="TEST FAILED (ERROR): something went wrong during during TestGroup setup (e.g. attack data replay)",
+                message=(
+                    "TEST FAILED (ERROR): something went wrong during during TestGroup setup (e.g. "
+                    "attack data replay)"
+                ),
                 exception=setup_results.exception,
                 duration=round(time.time() - test_start_time, 2),
                 status=TestResultStatus.ERROR
@@ -953,6 +879,12 @@ def execute_integration_test(
                 start_time=test_start_time
             )
 
+            # TODO (cmcginley): right now, we are creating one CorrelationSearch instance for each
+            #   test case; typically, there is only one unit test, and thus one integration test,
+            #   per detection, so this is not an issue. However, if we start having many test cases
+            #   per detection, we will be duplicating some effort & network calls that we don't need
+            #   to. Consider refactoring in order to re-use CorrelationSearch objects across tests
+            #   in such a case
             # Instantiate the CorrelationSearch
             correlation_search = CorrelationSearch(
                 detection_name=detection.name,
@@ -970,6 +902,9 @@ def execute_integration_test(
                 status=TestResultStatus.ERROR
             )
 
+        # TODO (cmcginley): when in interactive mode, consider maybe making the cleanup routine in
+        #   correlation_search happen after the user breaks the interactivity; currently
+        #   risk/notable indexes are dumped before the user can inspect
         # Pause here if the terminate flag has NOT been set AND either of the below are true:
         #   1. the behavior is always_pause
         #   2. the behavior is pause_on_failure and the test failed
diff --git a/contentctl/actions/detection_testing/views/DetectionTestingView.py b/contentctl/actions/detection_testing/views/DetectionTestingView.py
index 8c6f5ac0..16f0a71b 100644
--- a/contentctl/actions/detection_testing/views/DetectionTestingView.py
+++ b/contentctl/actions/detection_testing/views/DetectionTestingView.py
@@ -114,8 +114,12 @@ def getSummaryObject(
         untested_detections.sort(key=lambda x: x["name"])
 
         # Get lists of detections (name only) that were skipped due to their status (experimental or deprecated)
-        experimental_detections = sorted([detection.name for detection in self.sync_obj.skippedQueue if detection.status == DetectionStatus.experimental.value])  # noqa: E501
-        deprecated_detections = sorted([detection.name for detection in self.sync_obj.skippedQueue if detection.status == DetectionStatus.deprecated.value])  # noqa: E501
+        experimental_detections = sorted([
+            detection.name for detection in self.sync_obj.skippedQueue if detection.status == DetectionStatus.experimental.value
+        ])
+        deprecated_detections = sorted([
+            detection.name for detection in self.sync_obj.skippedQueue if detection.status == DetectionStatus.deprecated.value
+        ])
 
         # If any detection failed, the overall success is False
         if (total_fail + len(untested_detections)) == 0:
@@ -134,6 +138,10 @@ def getSummaryObject(
             total_pass, total_detections, 1
         )
 
+        # TODO (cmcginley): add stats around total test cases and unit/integration test
+        #   sucess/failure? maybe configurable reporting? add section to summary called
+        #   "testwise_summary" listing per test metrics (e.g. total test, total tests passed, ...);
+        #   also list num skipped at both detection and test level
         # Construct and return the larger results dict
         result_dict = {
             "summary": {
diff --git a/contentctl/contentctl.py b/contentctl/contentctl.py
index ecfe3feb..b4954af3 100644
--- a/contentctl/contentctl.py
+++ b/contentctl/contentctl.py
@@ -593,6 +593,11 @@ def main():
         help="Whether integration testing should be enabled, in addition to unit testing (requires a configured Splunk "
         "instance with ES installed)"
     )
+    # TODO (cmcginley): add flag for enabling logging for correlation_search logging
+    # TODO (cmcginley): add flag for changing max_sleep time for integration tests
+    # TODO (cmcginley): add setting to skip listing skips -> test_config.TestConfig,
+    #   contentctl.test, contentctl.main
+
 
     test_parser.set_defaults(func=test)
 
diff --git a/contentctl/objects/abstract_security_content_objects/detection_abstract.py b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
index d7a6b9a3..3ab5bf65 100644
--- a/contentctl/objects/abstract_security_content_objects/detection_abstract.py
+++ b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
@@ -167,6 +167,10 @@ def search_obsersables_exist_validate(cls, v, values):
         # Found everything
         return v
 
+    # TODO (cmcginley): Fix detection_abstract.tests_validate so that it surfaces validation errors
+    #   (e.g. a lack of tests) to the final results, instead of just showing a failed detection w/
+    #   no tests (maybe have a message propagated at the detection level? do a separate coverage
+    #   check as part of validation?):
     @validator("tests")
     def tests_validate(cls, v, values):
         if values.get("status", "") == DetectionStatus.production.value and not v:
@@ -188,7 +192,7 @@ def all_tests_successful(self) -> bool:
         failure; if any test fails (FAIL, ERROR), consider that a failure; if any test has
         no result or no status, consider that a failure. If all tests succeed (PASS, SKIP), consider
         the detection a success
-        :returns: bool where True indicates all tests succeeded (they existed, complete and were 
+        :returns: bool where True indicates all tests succeeded (they existed, complete and were
             PASS/SKIP)
         """
         # If no tests are defined, we consider it a failure for the detection
diff --git a/contentctl/objects/base_test.py b/contentctl/objects/base_test.py
index 06a6ed1a..c5ae1e02 100644
--- a/contentctl/objects/base_test.py
+++ b/contentctl/objects/base_test.py
@@ -18,6 +18,7 @@ def __str__(self) -> str:
         return self.value
 
 
+# TODO (cmcginley): enforce distinct test names w/in detections
 class BaseTest(BaseModel, ABC):
     """
     A test case for a detection
diff --git a/contentctl/objects/base_test_result.py b/contentctl/objects/base_test_result.py
index e968d6df..a88b44c8 100644
--- a/contentctl/objects/base_test_result.py
+++ b/contentctl/objects/base_test_result.py
@@ -26,6 +26,8 @@ def __str__(self) -> str:
         return self.value
 
 
+# TODO (cmcginley): add validator to BaseTestResult which makes a lack of exception incompatible
+#   with status ERROR
 class BaseTestResult(BaseModel):
     """
     Base class for test results
@@ -54,8 +56,6 @@ class Config:
         # Needed to allow for embedding of Exceptions in the model
         arbitrary_types_allowed = True
 
-    # TODO: add validator which makes a lack of exception incompatible with status ERROR
-
     @property
     def passed(self) -> bool:
         """
diff --git a/contentctl/objects/constants.py b/contentctl/objects/constants.py
index 288d32b4..47a5609b 100644
--- a/contentctl/objects/constants.py
+++ b/contentctl/objects/constants.py
@@ -8,7 +8,7 @@
     "Privilege Escalation": "Exploitation",
     "Defense Evasion": "Exploitation",
     "Credential Access": "Exploitation",
-    "Discovery": "Exploitation", 
+    "Discovery": "Exploitation",
     "Lateral Movement": "Exploitation",
     "Collection": "Exploitation",
     "Command And Control": "Command And Control",
@@ -132,4 +132,4 @@
     "Command_and_Control": "TA0011",
     "Exfiltration": "TA0010",
     "Impact": "TA0040"
-}
\ No newline at end of file
+}
diff --git a/contentctl/objects/correlation_search.py b/contentctl/objects/correlation_search.py
index 626371ab..9950636b 100644
--- a/contentctl/objects/correlation_search.py
+++ b/contentctl/objects/correlation_search.py
@@ -21,7 +21,7 @@
 
 
 # Suppress logging by default; enable for local testing
-ENABLE_LOGGING = True
+ENABLE_LOGGING = False
 LOG_LEVEL = logging.DEBUG
 LOG_PATH = "correlation_search.log"
 
@@ -112,11 +112,13 @@ class TimeoutConfig(int, Enum):
     MAX_SLEEP = 210
 
 
+# TODO (cmcginley): evaluate sane defaults for timeframe for integration testing (e.g. 5y is good
+#   now, but maybe not always...); maybe set latest/earliest to None?
 class ScheduleConfig(str, Enum):
     """
     Configuraton values for the saved search schedule
     """
-    EARLIEST_TIME: str = "-3y@y"
+    EARLIEST_TIME: str = "-5y@y"
     LATEST_TIME: str = "-1m@m"
     CRON_SCHEDULE: str = "*/1 * * * *"
 
@@ -408,6 +410,10 @@ def _get_notable_action(content: dict[str, Any]) -> Optional[NotableAction]:
             return NotableAction.parse_from_dict(content)
         return None
 
+    # TODO (cmcginley): ideally, we could handle this and the following init w/ a call to
+    #   model_post_init, so that all the logic is encapsulated w/in _parse_risk_and_notable_actions
+    #   but that is a pydantic v2 feature (see the init validators for risk/notable actions):
+    #   https://docs.pydantic.dev/latest/api/base_model/#pydantic.main.BaseModel.model_post_init
     def _parse_risk_and_notable_actions(self) -> None:
         """Parses the risk/notable metadata we care about from self.saved_search.content
 
@@ -482,9 +488,9 @@ def update_timeframe(
         data it runs on is no older than the given earliest time and no newer than the given latest time; optionally
         calls self.refresh() (optional, because in some situations the caller may want to handle calling refresh, to
         avoid repeated network operations).
-        :param earliest_time: the max age of data for the search to run on (default: "-3y@y")
-        :param earliest_time: the max age of data for the search to run on (default: "-3y@y")
-        :param cron_schedule: the cron schedule for the search to run on (default: "*/1 * * * *")
+        :param earliest_time: the max age of data for the search to run on (default: see ScheduleConfig)
+        :param earliest_time: the max age of data for the search to run on (default: see ScheduleConfig)
+        :param cron_schedule: the cron schedule for the search to run on (default: see ScheduleConfig)
         :param refresh: a bool indicating whether to run refresh after enabling
         """
         # update the SavedSearch accordingly
@@ -519,6 +525,8 @@ def force_run(self, refresh=True) -> None:
         if refresh:
             self.refresh()
 
+    # TODO (cmcginley): make the search for risk/notable events a more specific query based on the
+    #   search in question (and update the docstring to relfect when you do)
     def risk_event_exists(self) -> bool:
         """Whether a risk event exists
 
@@ -651,6 +659,8 @@ def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP.value, raise_on_exc: boo
                     # reset the result to None on each loop iteration
                     result = None
 
+                    # TODO (cmcginley): add more granular error messaging that can show success in
+                    #   finding a notable, but failure in finding a risk and vice-versa
                     # check for risk events
                     self.logger.debug("Checking for matching risk events")
                     if self.has_risk_analysis_action:
diff --git a/contentctl/objects/risk_analysis_action.py b/contentctl/objects/risk_analysis_action.py
index 7a5287b8..46289ee6 100644
--- a/contentctl/objects/risk_analysis_action.py
+++ b/contentctl/objects/risk_analysis_action.py
@@ -7,14 +7,16 @@
 from contentctl.objects.threat_object import ThreatObject
 
 
+# TODO (cmcginley): add logic which reports concretely that integration testing failed (or would fail)
+#   as a result of a missing victim observable
 class RiskAnalysisAction(BaseModel):
     """Representation of a risk analysis action
 
-    NOTE: this is NOT a representation of risk event generated as a result of detection firing, but rather of the
-    Adaptive Response Action that generates the risk event
+    NOTE: this is NOT a representation of risk event generated as a result of detection firing, but
+    rather of the Adaptive Response Action that generates the risk event
     :param risk_objects: a list of RiskObject instances for this action
-    :param message: the message associated w/ the risk event (NOTE: may contain macros of the form $...$ which
-        should be replaced with real values in the resulting risk events)
+    :param message: the message associated w/ the risk event (NOTE: may contain macros of the form
+        $...$ which should be replaced with real values in the resulting risk events)
     """
     risk_objects: list[RiskObject]
     message: str
@@ -51,30 +53,31 @@ def parse_from_dict(cls, dict_: dict[str, Any]) -> "RiskAnalysisAction":
         """
         Given a dictionary of values, parses out specific keys to construct one or more instances.
 
-        The risk action has 1 or more associates risk objects (e.g. user, system, etc.). These are stored as a list of
-        dicts inside the 'action.risk.param._risk' field, which we need to unpack the JSON of.
+        The risk action has 1 or more associated risk objects (e.g. user, system, etc.). These are
+        stored as a list of dicts inside the 'action.risk.param._risk' field, which we need to
+        unpack the JSON of.
         :param dict_: a dictionary of with the expected keys
         :return: and instance of RiskAnalysisAction
         :raises KeyError: if the dictionary given does not contain a required key
-        :raises json.JSONDecodeError: if the value at dict_['action.risk.param._risk'] can't be decoded from JSON into
-            a dict
-        :raises ValueError: if the value at dict_['action.risk.param._risk'] is unpacked to be anything
-            other than a singleton or if an unexpected field is enoucountered within the singleton
+        :raises json.JSONDecodeError: if the value at dict_['action.risk.param._risk'] can't be
+            decoded from JSON into a dict
+        :raises ValueError: if the value at dict_['action.risk.param._risk'] is unpacked to be
+            anything other than a singleton or if an unexpected field is enoucountered within the
+            singleton
         :return: a RiskAnalysisAction
         """
         object_dicts = json.loads(dict_["action.risk.param._risk"])
 
         if len(object_dicts) < 1:
             raise ValueError(
-                f"Risk analysis action has no objects (threat nor risk) defined in 'action.risk.param._risk': "
-                f"{dict_['action.risk.param._risk']}"
+                f"Risk analysis action has no objects (threat nor risk) defined in "
+                f"'action.risk.param._risk': {dict_['action.risk.param._risk']}"
             )
 
         risk_objects: list[RiskObject] = []
         threat_objects: list[ThreatObject] = []
 
-        # TODO: should we raise an error if we have only threat objects and no risk objects? Scoring is only a notion
-        #   for risk objects as far as I'm aware
+        # TODO: add validation ensuring at least 1 risk objects
         for entry in object_dicts:
             if "risk_object_field" in entry:
                 risk_objects.append(RiskObject(
diff --git a/contentctl/objects/unit_test_result.py b/contentctl/objects/unit_test_result.py
index a87e12e3..9467dd78 100644
--- a/contentctl/objects/unit_test_result.py
+++ b/contentctl/objects/unit_test_result.py
@@ -61,7 +61,6 @@ def set_job_content(
                 sid=content.get("sid", None),
             )
 
-        # TODO: this error message seems not the most helpful, since content must be None for it to be set
         elif content is None:
             self.job_content = None
             self.status = TestResultStatus.ERROR

From ed12b4af79f6ae4801916db02313b37f0ffc7eff Mon Sep 17 00:00:00 2001
From: Casey McGinley <cmcginley@splunk.com>
Date: Thu, 14 Dec 2023 15:29:57 -0800
Subject: [PATCH 21/22] final todo

---
 contentctl/objects/risk_analysis_action.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contentctl/objects/risk_analysis_action.py b/contentctl/objects/risk_analysis_action.py
index 46289ee6..207f3653 100644
--- a/contentctl/objects/risk_analysis_action.py
+++ b/contentctl/objects/risk_analysis_action.py
@@ -77,7 +77,7 @@ def parse_from_dict(cls, dict_: dict[str, Any]) -> "RiskAnalysisAction":
         risk_objects: list[RiskObject] = []
         threat_objects: list[ThreatObject] = []
 
-        # TODO: add validation ensuring at least 1 risk objects
+        # TODO (cmcginley): add validation ensuring at least 1 risk objects
         for entry in object_dicts:
             if "risk_object_field" in entry:
                 risk_objects.append(RiskObject(

From 1c6c7f6e69866ff35afb7c2f7cf7f454fb0ef0d2 Mon Sep 17 00:00:00 2001
From: pyth0n1c <emcginnis@splunk.com>
Date: Thu, 15 Feb 2024 14:13:08 -0800
Subject: [PATCH 22/22] Bump release to 3.1.0 due to significant changes.

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 416ff0df..7cbbab7c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "contentctl"
-version = "3.0.2"
+version = "3.1.0"
 description = "Splunk Content Control Tool"
 authors = ["STRT <research@splunk.com>"]
 license = "Apache 2.0"