diff --git a/scripts/header-check.sh b/scripts/header-check.sh
index 22a1847fc..b9c7d2325 100755
--- a/scripts/header-check.sh
+++ b/scripts/header-check.sh
@@ -26,9 +26,10 @@ EXCLUDE_PATTERNS=(
     "core/src/main/resources/*"
     "core/src/test/resources/*"
     "user_tools/src/spark_rapids_pytools/resources/*"
-    "user_tools/docs/resources/*"
+    "user_tools/docs/*"
     "user_tools/tests/spark_rapids_tools_ut/resources/*"
     "*.csv"
+    "*.zstd"
     )
 
 # Create the grep exclude options (--exclude=*csv --exclude=core/src/test/resources/*)
diff --git a/user_tools/docs/resources/debug-behave-intellij.png b/user_tools/docs/resources/debug-behave-intellij.png
new file mode 100644
index 000000000..a21c69587
Binary files /dev/null and b/user_tools/docs/resources/debug-behave-intellij.png differ
diff --git a/user_tools/docs/tools_e2e_tests.md b/user_tools/docs/tools_e2e_tests.md
new file mode 100644
index 000000000..6cd9a4d93
--- /dev/null
+++ b/user_tools/docs/tools_e2e_tests.md
@@ -0,0 +1,172 @@
+# Spark Rapids Tools End-to-End Behavior Tests
+
+This document outlines the end-to-end tests for Spark Rapids tools, designed to cover scenarios such as missing
+dependencies, handling different types of event logs, and interacting with HDFS.
+
+## Directory Structure
+```commandline
+user_tools/tests/spark_rapids_tools_e2e/
+├── features                        # Contains test scenarios and environment setup.
+│  ├── environment.py               # Setup and teardown procedures for the tests.
+│  ├── steps                        # Step definitions for the tests.
+│  └── *.feature                    # Feature files defining test scenarios.
+└── resources                       # Resources used in the tests.
+    ├── event_logs  
+    └── scripts                     # Scripts used in the tests.  
+```
+Configurations for `behave` tests are defined in `user_tools/tox.ini` file.
+
+## Setup
+
+From the `<repo_root>/user_tools` directory, run the following command to install the required dependencies:
+
+
+```sh
+pip install behave
+# or
+pip install .[test]
+```
+
+
+## Running Tests
+Tests can be run using 'behave' cmd or using 'tox' cmd.
+
+**Basic Usage:**
+
+```sh
+behave <options>
+# or
+tox -e behave -- <options>
+```
+
+**Run All Tests:**
+
+```sh
+behave
+# or
+tox -e behave
+```
+
+### Common Options
+
+**Run Specific Tests by Tag**
+
+See the [Tags Format](#tags-format) section for more information on tags.
+
+```sh
+behave --tags <tag>
+# or
+tox -e behave -- --tags <tag>
+```
+
+**Run Specific Tests by Name**
+
+```sh
+behave --name <scenario_name>
+# or
+tox -e behave -- --name <scenario_name>
+```
+
+**Skip Tests by Tag**
+
+```sh
+behave --tags ~<tag>
+# or
+tox -e behave -- --tags ~<tag>
+```
+
+**Custom Arguments**
+- Custom arguments can be passed to the behave tests using the `-D` flag.
+- Example: Skip building the Tools jar during setup.
+
+```sh
+behave -D build_jar=false   # Skip building the Tools jar during setup (default: true)
+# or
+tox -e behave -- -D build_jar=false
+```
+
+**Verbose Mode**
+- When verbose mode is enabled, `STDOUT` and `STDERR` from all subprocesses executed during the test run are shown in the console.
+```sh
+behave -v
+# or
+tox -e behave -- -v
+```
+
+## Notes
+
+### Tags Format
+Tags are used to uniquely identify test scenarios and are defined in the following format: `@test_id_<feature>_<00xx>`.
+- `<feature>`: Acronym for the feature file being tested. Examples:
+   - `ELP` for `event_log_processing.feature`
+   - `IC` for `installation_checks.feature`
+- `<00xx>`: Unique 4-digit identifier for the test scenario. Examples: `0001`, `0002`.
+
+Tags Example: `@test_id_ELP_0001`, `@test_id_IC_0002`.
+
+### Built-in Setup Steps
+
+The tests include the following setup steps:
+
+1. Build Spark Rapids Tools JAR:
+    - By default, the JAR is built before running the tests.
+    - To skip this step (e.g., if the JAR is already built), use the argument -D build_jar=false.
+2. Build the Python Package.
+
+The test warns the user that initial setup may take a few minutes.
+
+### Built-in HDFS Cluster Setup
+
+- Some of the tests include configuring a local HDFS cluster. Step: `HDFS is "{status}"`
+- This step downloads Hadoop binaries and sets up the cluster.
+  - The download occurs only once per machine but cluster setup is done for each test run.
+  - Download step may take a few minutes.
+- Tests involving HDFS are tagged with `@long_running` and can be skipped using `--tags ~@long_running`
+
+#### HDFS Configuration:
+- Replication factor: 1
+- Disk Space Quota: 2GB
+- Temp Directory: `/tmp/spark_rapids_tools_e2e_tests`
+   - Temp Directory can be changed using the argument `-D e2e_tests_tmp_dir=<dir_path>` during test run.
+
+#### Cleanup
+- Step `HDFS is "{status}"` sets an after scenario hook to stop up the HDFS cluster and remove the temporary directories.
+- It does not clean up the Hadoop binaries downloaded during the setup.
+- Cleanup can be done manually using the below script:
+```sh
+<repo_root>/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/cleanup_hdfs.sh
+```  
+
+## Debugging Tests in IDE:
+
+- Ensure the Python interpreter is set to the correct virtual environment and `JAVA_HOME` is set.
+
+**IntelliJ**
+- Add a Python run configuration with module name: `behave` and working directory: `<repo-root>/user_tools`.
+- Add required arguments in `Script parameters` field.
+
+Sample Run Configuration:
+![resources/debug-behave-intellij.png](resources/debug-behave-intellij.png)
+
+**VS Code**
+- Open or create the `.vscode/launch.json` file. Add the following configuration with required arguments:
+```json
+{
+    "configurations": [
+        {
+            "name": "Python: Spark Rapids Tools E2E Tests",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "behave",
+            "args": [],  
+            "python": "${command:python.interpreterPath}",
+            "cwd": "${workspaceFolder}/user_tools"
+        }
+    ]
+}
+```
+
+
+## Guidelines for Writing Tests
+
+TODO: Add guidelines and conventions for writing tests.
diff --git a/user_tools/pyproject.toml b/user_tools/pyproject.toml
index b4fdf1ed7..39d46ebfe 100644
--- a/user_tools/pyproject.toml
+++ b/user_tools/pyproject.toml
@@ -76,7 +76,7 @@ version = {attr = "spark_rapids_pytools.__version__"}
 repository = "https://github.com/NVIDIA/spark-rapids-tools/tree/main"
 [project.optional-dependencies]
 test = [
-    "tox", 'pytest', 'cli_test_helpers'
+    "tox", 'pytest', 'cli_test_helpers', 'behave'
 ]
 qualx = [
     "holoviews",
diff --git a/user_tools/tests/spark_rapids_tools_e2e/features/environment.py b/user_tools/tests/spark_rapids_tools_e2e/features/environment.py
new file mode 100644
index 000000000..228f3e2dd
--- /dev/null
+++ b/user_tools/tests/spark_rapids_tools_e2e/features/environment.py
@@ -0,0 +1,119 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This module defines environment setup and teardown functions for the end-to-end tests using behave.
+"""
+
+import os
+import shutil
+import tempfile
+
+from spark_rapids_tools.utils import Utilities
+from steps.e2e_utils import E2ETestUtils
+
+""" Define behave hooks for the tests. These hooks are automatically called by behave. """
+
+logger = E2ETestUtils.get_logger()
+
+
+def before_all(context) -> None:
+    """
+    Set up the environment for the tests. This function is automatically called before all the tests.
+    """
+    context.temp_dir = tempfile.mkdtemp()
+    _set_environment_variables(context)
+    _set_verbose_mode(context)
+    _setup_env(context)
+
+
+def after_all(context) -> None:
+    """
+    Clean up the environment after the tests. This function is automatically called after all the tests.
+    """
+    _clear_environment_variables()
+    shutil.rmtree(context.temp_dir)
+
+
+def before_scenario(context, scenario) -> None:
+    if "skip" in scenario.effective_tags:
+        scenario.skip("Marked with @skip")
+        return
+
+
+def after_scenario(context, scenario) -> None:
+    """
+    Clean up the environment after each scenario. This function is automatically called after each scenario.
+    Steps must set the callback function using set_after_scenario_fn() to perform any cleanup.
+    """
+    if hasattr(context, 'after_scenario_fn'):
+        context.after_scenario_fn()
+
+
+def _set_verbose_mode(context) -> None:
+    verbose_enabled = getattr(context.config, 'verbose', False)
+    if verbose_enabled:
+        context.config.stdout_capture = False
+        context.config.stderr_capture = False
+    os.environ['E2E_TEST_VERBOSE_MODE'] = str(verbose_enabled).lower()
+
+
+def _set_environment_variables(context) -> None:
+    """
+    Set environment variables needed for the virtual environment setup.
+    """
+    tools_version = Utilities.get_base_release()
+    scala_version = context.config.userdata.get('scala_version')
+    venv_name = context.config.userdata.get('venv_name')
+    jar_filename = f'rapids-4-spark-tools_{scala_version}-{tools_version}-SNAPSHOT.jar'
+    build_jar_value = context.config.userdata.get('build_jar')
+    build_jar = build_jar_value.lower() in ['true', '1', 'yes']
+
+    os.environ['E2E_TEST_TOOLS_DIR'] = E2ETestUtils.get_tools_root_path()
+    os.environ['E2E_TEST_SCRIPTS_DIR'] = os.path.join(E2ETestUtils.get_e2e_tests_resource_path(), 'scripts')
+    os.environ['E2E_TEST_TOOLS_JAR_PATH'] = os.path.join(os.environ['E2E_TEST_TOOLS_DIR'],
+                                                         f'core/target/{jar_filename}')
+    os.environ['E2E_TEST_VENV_DIR'] = os.path.join(context.temp_dir, venv_name)
+    os.environ['E2E_TEST_BUILD_JAR'] = 'true' if build_jar else 'false'
+    os.environ['E2E_TEST_SPARK_BUILD_VERSION'] = context.config.userdata.get('buildver')
+    os.environ['E2E_TEST_HADOOP_VERSION'] = context.config.userdata.get('hadoop.version')
+    os.environ['E2E_TEST_TMP_DIR'] = context.config.userdata.get('e2e_tests_tmp_dir')
+
+
+def _setup_env(context) -> None:
+    """
+    Build the JAR and set up the virtual environment for the tests.
+    """
+    script_file_name = context.config.userdata.get('setup_script_file')
+    script = os.path.join(os.environ['E2E_TEST_SCRIPTS_DIR'], script_file_name)
+    try:
+        warning_msg = "Setting up the virtual environment for the tests. This may take a while."
+        if os.environ.get('BUILD_JAR') == 'true':
+            warning_msg = f'Building JAR and {warning_msg}'
+        logger.warning(warning_msg)
+        result = E2ETestUtils.run_sys_cmd([script])
+        E2ETestUtils.assert_sys_cmd_return_code(result,
+                                                exp_return_code=0,
+                                                error_msg="Failed to create virtual environment")
+    except Exception as e:  # pylint: disable=broad-except
+        raise RuntimeError(f"Failed to create virtual environment. Reason: {str(e)}") from e
+
+
+def _clear_environment_variables() -> None:
+    """
+    Clear environment variables set for the virtual environment setup.
+    """
+    env_vars = ['SCRIPTS_DIR', 'VENV_DIR', 'TOOLS_JAR_PATH']
+    for key in env_vars:
+        os.environ.pop(key, None)
diff --git a/user_tools/tests/spark_rapids_tools_e2e/features/event_log_processing.feature b/user_tools/tests/spark_rapids_tools_e2e/features/event_log_processing.feature
new file mode 100644
index 000000000..6a3f97cb0
--- /dev/null
+++ b/user_tools/tests/spark_rapids_tools_e2e/features/event_log_processing.feature
@@ -0,0 +1,43 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+Feature: Event Log Processing
+
+  @test_id_ELP_0001
+  Scenario Outline: Tool spark_rapids runs with different types of event logs
+    When spark-rapids tool is executed with "<event_logs>" eventlogs
+    Then stderr contains the following
+      """
+      <expected_stderr>
+      """
+    And processed applications is "0"
+    And return code is "0"
+
+    Examples:
+      | event_logs                         | expected_stderr                                                                                                       |
+      | invalid_path_eventlog              | process.failure.count = 1;invalid_path_eventlog not found, skipping!                                                  |
+      | gpu_eventlog.zstd                  | process.skipped.count = 1;GpuEventLogException: Cannot parse event logs from GPU run: skipping this file              |
+      | photon_eventlog.zstd               | process.skipped.count = 1;PhotonEventLogException: Encountered Databricks Photon event log: skipping this file!       |
+      | streaming_eventlog.zstd            | process.skipped.count = 1;StreamingEventLogException: Encountered Spark Structured Streaming Job: skipping this file! |
+      | incorrect_app_status_eventlog.zstd | process.NA.count = 1;IncorrectAppStatusException: Application status is incorrect. Missing AppInfo                    |
+
+  @test_id_ELP_0002
+  Scenario: Qualification tool JAR crashes
+    Given thread to crash qualification tool has started
+    When spark-rapids tool is executed with "join_agg_on_yarn_eventlog.zstd" eventlogs
+    Then stderr contains the following
+      """
+      Qualification. Raised an error in phase [Execution]
+      """
+    And return code is "1"
diff --git a/user_tools/tests/spark_rapids_tools_e2e/features/hdfs_storage.feature b/user_tools/tests/spark_rapids_tools_e2e/features/hdfs_storage.feature
new file mode 100644
index 000000000..714ddfc2b
--- /dev/null
+++ b/user_tools/tests/spark_rapids_tools_e2e/features/hdfs_storage.feature
@@ -0,0 +1,88 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@long_running
+Feature: HDFS Event Log Storage
+
+  @test_id_HDFS_0001
+  Scenario Outline: Eventlogs are stored in HDFS - Platform specified
+    Given platform is "<platform>"
+    And HDFS is "running"
+    And HDFS has "<eventlog>" eventlogs
+    When spark-rapids tool is executed with "hdfs:///<eventlog>" eventlogs
+    Then processed applications is "1"
+    And return code is "0"
+
+    Examples:
+      | platform | eventlog              |
+      | onprem   | join_agg_on_yarn_eventlog.zstd |
+      | dataproc | join_agg_on_yarn_eventlog.zstd |
+
+  @test_id_HDFS_0002
+  Scenario Outline: Eventlogs are stored in HDFS - Platform not specified
+    Given HDFS is "running"
+    And HDFS has "<eventlog>" eventlogs
+    When spark-rapids tool is executed with "hdfs:///<eventlog>" eventlogs
+    Then processed applications is "1"
+    And return code is "0"
+
+    Examples:
+      | eventlog              |
+      | join_agg_on_yarn_eventlog.zstd |
+
+  @test_id_HDFS_0003
+  Scenario Outline: Eventlogs are stored in HDFS - HDFS installed but not running
+    Given HDFS is "not running"
+    When spark-rapids tool is executed with "hdfs:///<eventlog>" eventlogs
+    Then stderr contains the following
+      """
+      EventLogPathProcessor: Unexpected exception occurred reading hdfs:///<eventlog>, skipping!
+      """
+    And processed applications is "0"
+    And return code is "0"
+
+    Examples:
+      | eventlog              |
+      | join_agg_on_yarn_eventlog.zstd |
+
+  @test_id_HDFS_0004
+  Scenario Outline: Eventlogs are stored in HDFS - HDFS not installed, Platform specified
+    Given platform is "onprem"
+    And HDFS is "not installed"
+    When spark-rapids tool is executed with "hdfs:///<eventlog>" eventlogs
+    Then stderr contains the following
+      """
+      EventLogPathProcessor: Unexpected exception occurred reading hdfs:///<eventlog>, skipping!;Incomplete HDFS URI
+      """
+    And processed applications is "0"
+    And return code is "0"
+
+    Examples:
+      | eventlog              |
+      | join_agg_on_yarn_eventlog.zstd |
+
+  @test_id_HDFS_0005
+  Scenario Outline: Eventlogs are stored in HDFS - HDFS not installed, Platform not specified
+    Given HDFS is "not installed"
+    When spark-rapids tool is executed with "hdfs:///<eventlog>" eventlogs
+    Then stderr contains the following
+      """
+      EventLogPathProcessor: Unexpected exception occurred reading hdfs:///<eventlog>, skipping!;Incomplete HDFS URI
+      """
+    And processed applications is "0"
+    And return code is "0"
+
+    Examples:
+      | eventlog              |
+      | join_agg_on_yarn_eventlog.zstd |
diff --git a/user_tools/tests/spark_rapids_tools_e2e/features/installation_checks.feature b/user_tools/tests/spark_rapids_tools_e2e/features/installation_checks.feature
new file mode 100644
index 000000000..3edce082f
--- /dev/null
+++ b/user_tools/tests/spark_rapids_tools_e2e/features/installation_checks.feature
@@ -0,0 +1,44 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+Feature: Tool Installation Checks
+
+  @test_id_IC_0001
+  Scenario Outline: Environment has missing CLI and spark_rapids tool processes eventlogs
+    Given platform is "<platform>"
+    And "<cli>" is not installed
+    When spark-rapids tool is executed with "join_agg_on_yarn_eventlog.zstd" eventlogs
+    Then stdout contains the following
+      """
+      <expected_stdout>
+      """
+    And processed applications is "1"
+    And return code is "0"
+
+    Examples:
+      | platform         | cli    | expected_stdout                 |
+      | dataproc         | gcloud | 2 x n1-standard-16 (4 T4 each)  |
+      | emr              | aws    | 10 x g5.xlarge                  |
+      | databricks-aws   | aws    | 10 x g5.xlarge                  |
+      | databricks-azure | az     | 2 x Standard_NC64as_T4_v3       |
+
+  @test_id_IC_0002
+  Scenario: Environment has missing java
+    Given "java" is not installed
+    When spark-rapids tool is executed with "join_agg_on_yarn_eventlog.zstd" eventlogs
+    Then stderr contains the following
+      """
+      RuntimeError: Error invoking CMD
+      """
+    And return code is "1"
diff --git a/user_tools/tests/spark_rapids_tools_e2e/features/steps/e2e_utils.py b/user_tools/tests/spark_rapids_tools_e2e/features/steps/e2e_utils.py
new file mode 100644
index 000000000..4468c0050
--- /dev/null
+++ b/user_tools/tests/spark_rapids_tools_e2e/features/steps/e2e_utils.py
@@ -0,0 +1,225 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This module defines utility functions used by the end-to-end tests using behave.
+"""
+
+import logging
+import os
+import subprocess
+from attr import dataclass
+from enum import auto
+from pathlib import Path
+from typing import List
+from urllib.parse import urlparse
+
+from spark_rapids_tools import EnumeratedType
+
+
+@dataclass
+class E2ETestUtils:
+
+    @staticmethod
+    def get_cmd_output_str(cmd_result: subprocess.CompletedProcess) -> str:
+        """
+        Get the output of a command as a string (stdout and stderr).
+        """
+        dash_line = '-' * 50
+        cmd_sections = [("COMMAND", ' '.join(cmd_result.args)),
+                        ("STDOUT", cmd_result.stdout),
+                        ("STDERR", cmd_result.stderr)]
+        output_sections = []
+        for label, content in cmd_sections:
+            content = content.strip() if content else "No output"
+            output_sections.append(f"{dash_line}\n{label}\n{dash_line}\n{content}\n{dash_line}")
+        return '\n'.join(output_sections).strip() + '\n'
+
+    @classmethod
+    def run_sys_cmd(cls, cmd: list) -> subprocess.CompletedProcess:
+        """
+        Run a system command and return the result.
+        If verbose mode is enabled by the behave config, print the command and its output
+        """
+        cmd_result = subprocess.run(cmd, capture_output=True, text=True)
+        if cls.is_verbose_mode():
+            print(cls.get_cmd_output_str(cmd_result))
+        return cmd_result
+
+    @classmethod
+    def assert_sys_cmd_return_code(cls,
+                                   cmd_result: subprocess.CompletedProcess,
+                                   exp_return_code: int = 0,
+                                   error_msg: str = None) -> None:
+        assert cmd_result.returncode == exp_return_code, \
+            f"{error_msg}\n{cls.get_cmd_output_str(cmd_result)}"
+
+    @classmethod
+    def create_spark_rapids_cmd(cls,
+                                event_logs: List[str],
+                                output_dir: str,
+                                platform: str = 'onprem',
+                                filter_apps: str = 'all') -> List[str]:
+        """
+        Create the command to run the Spark Rapids qualification tool.
+        TODO: We can add more options to the command as needed.
+        """
+        return [
+            cls.get_spark_rapids_cli(),
+            'qualification',
+            '--platform', platform,
+            '--eventlogs', ','.join(event_logs),
+            '-o', output_dir,
+            '--tools_jar', cls.get_tools_jar_file(),
+            '--verbose',
+            '--filter_apps', filter_apps
+        ]
+
+    # Utility getter functions
+    @staticmethod
+    def get_tools_root_path() -> str:
+        return str(Path(__file__).parents[5])
+
+    @staticmethod
+    def get_e2e_tests_root_path() -> str:
+        return str(Path(__file__).parents[2])
+
+    @classmethod
+    def get_e2e_tests_config_file(cls) -> str:
+        return os.path.join(cls.get_e2e_tests_root_path(), 'behave.ini')
+
+    @classmethod
+    def get_e2e_tests_resource_path(cls) -> str:
+        return os.path.join(cls.get_e2e_tests_root_path(), 'resources')
+
+    @classmethod
+    def get_local_event_logs_dir(cls) -> str:
+        return os.path.join(cls.get_e2e_tests_resource_path(), 'event_logs')
+
+    @staticmethod
+    def get_spark_rapids_cli() -> str:
+        return os.path.join(os.environ['E2E_TEST_VENV_DIR'], 'bin', 'spark_rapids')
+
+    @staticmethod
+    def get_tools_jar_file() -> str:
+        return os.environ['E2E_TEST_TOOLS_JAR_PATH']
+
+    @staticmethod
+    def is_verbose_mode() -> bool:
+        return os.environ['E2E_TEST_VERBOSE_MODE'].lower() == 'true'
+
+    @classmethod
+    def resolve_event_logs(cls, event_logs: List[str]) -> List[str]:
+        """
+        Get the full path of the event logs if they are local files.
+        """
+        # Base directory can be modified (i.e. separate for local and CICD runs)
+        fs = urlparse(event_logs[0]).scheme
+        if not fs or fs == 'file':
+            event_logs_dir = cls.get_local_event_logs_dir()
+            return [os.path.join(event_logs_dir, event_log) for event_log in event_logs]
+        return event_logs
+
+    @classmethod
+    def replace_cli_with_mock(cls, cli_name: str, temp_dir: str) -> None:
+        """
+        Replace the specified CLI in the PATH environment variable with a mock version that simulates the
+        command not being found.
+
+        :param cli_name: The name of the CLI command to replace in the PATH.
+        :param temp_dir: The temporary directory where the mock CLI will be created.
+        """
+        mock_cli_path = os.path.join(temp_dir, cli_name)
+        with open(mock_cli_path, "w") as f:
+            f.write("#!/bin/bash\n")
+            f.write(f"echo '{cli_name}: command not found'\n")
+            f.write("exit 1\n")
+        os.chmod(mock_cli_path, 0o755)
+        os.environ['PATH'] = temp_dir + ":" + os.environ['PATH']
+
+        # verify the CLI is not in the PATH
+        cmd_result = cls.run_sys_cmd([cli_name])
+        cls.assert_sys_cmd_return_code(cmd_result, exp_return_code=1, error_msg=f"{cli_name} is still in the PATH")
+
+    @staticmethod
+    def get_logger() -> logging.Logger:
+        """
+        Create a logger for the module.
+        """
+        logging.basicConfig(
+            level=logging.INFO,
+            format='%(levelname)s: %(message)s'
+        )
+        return logging.getLogger(__name__)
+
+
+class HdfsStatus(EnumeratedType):
+    RUNNING = auto()
+    NOT_RUNNING = auto()
+    NOT_INSTALLED = auto()
+
+    @classmethod
+    def fromstring(cls, value: str) -> 'EnumeratedType':
+        return super().fromstring(value.replace(' ', '_'))
+
+
+class HdfsTestUtils:
+
+    @staticmethod
+    def setup_hdfs(should_run: bool) -> None:
+        """
+        Sets up the HDFS environment.
+
+        Executes a shell script to set up HDFS and configures the environment variables
+        required for HDFS. Depending on the `should_run` parameter, it either starts HDFS or simply
+        configures the environment without starting it.
+        :param should_run: Boolean flag to indicate whether to start HDFS.
+        """
+        try:
+            hdfs_setup_script = os.path.join(os.environ['E2E_TEST_SCRIPTS_DIR'], 'hdfs', 'setup_hdfs.sh')
+            args = ["--run" if should_run else "--no-run"]
+            cmd_result = E2ETestUtils.run_sys_cmd([hdfs_setup_script] + args)
+            E2ETestUtils.assert_sys_cmd_return_code(cmd_result, exp_return_code=0, error_msg="Failed to setup HDFS")
+            hadoop_home = cmd_result.stdout.splitlines()[-1]
+            hadoop_conf_dir = os.path.join(hadoop_home, 'etc', 'hadoop')
+            assert os.path.exists(hadoop_home), f"HADOOP_HOME: {hadoop_home} does not exist"
+            os.environ['HADOOP_HOME'] = hadoop_home
+            if not should_run:
+                os.environ['HADOOP_CONF_DIR'] = hadoop_conf_dir
+            os.environ['PATH'] = f"{hadoop_home}/bin:{hadoop_home}/sbin:{os.environ['PATH']}"
+        except Exception as e:  # pylint: disable=broad-except
+            raise RuntimeError(f"Failed to setup HDFS.\nReason: {e}") from e
+
+    @staticmethod
+    def cleanup_hdfs() -> None:
+        """
+        Stops the HDFS and cleans up the environment.
+        """
+        hdfs_cleanup_script = os.path.join(os.environ['E2E_TEST_SCRIPTS_DIR'], 'hdfs', 'cleanup_hdfs.sh')
+        try:
+            cmd_result = E2ETestUtils.run_sys_cmd([hdfs_cleanup_script])
+            E2ETestUtils.assert_sys_cmd_return_code(cmd_result, exp_return_code=0, error_msg="Failed to stop HDFS")
+        except Exception as e:  # pylint: disable=broad-except
+            raise RuntimeError(f"Failed to stop HDFS.\nReason: {e}") from e
+
+    @staticmethod
+    def hdfs_is_active() -> bool:
+        """
+        Check if HDFS is already active.
+        """
+        try:
+            output = subprocess.check_output(['jps'], text=True)
+            return 'NameNode' in output
+        except (subprocess.CalledProcessError, FileNotFoundError) as e:
+            raise RuntimeError("Failed to check if HDFS is running.") from e
diff --git a/user_tools/tests/spark_rapids_tools_e2e/features/steps/test_steps.py b/user_tools/tests/spark_rapids_tools_e2e/features/steps/test_steps.py
new file mode 100644
index 000000000..e1bd83933
--- /dev/null
+++ b/user_tools/tests/spark_rapids_tools_e2e/features/steps/test_steps.py
@@ -0,0 +1,172 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This module defines steps to be used by the end-to-end tests using behave.
+"""
+
+import os
+import shutil
+import tempfile
+import threading
+from time import sleep
+from typing import Callable
+
+from behave import given, when, then
+
+from e2e_utils import E2ETestUtils, HdfsTestUtils, HdfsStatus
+
+logger = E2ETestUtils.get_logger()
+
+
+def set_after_scenario_fn(context, fn: Callable) -> None:
+    """
+    Set the callback function to be called after each scenario.
+
+    See also:
+         user_tools.tests.spark_rapids_tools_e2e.features.environment.after_scenario()
+    """
+    context.after_scenario_fn = fn
+
+
+@given('platform is "{platform}"')
+def step_set_platform(context, platform) -> None:
+    context.platform = platform
+
+
+@given('"{cli}" is not installed')
+def step_replace_cli_with_mock(context, cli) -> None:
+    original_path = os.environ["PATH"]
+    tempdir = tempfile.mkdtemp()
+
+    E2ETestUtils.replace_cli_with_mock(cli, tempdir)
+
+    def after_scenario_fn():
+        os.environ.update({"PATH": original_path})
+        shutil.rmtree(tempdir)
+
+    set_after_scenario_fn(context, after_scenario_fn)
+
+
+def _start_qualification_tool_crash_thread_internal(_stop_event: threading.Event) -> None:
+    """
+    Start a thread to crash the qualification tool after 1s of it starting.
+    :param _stop_event: Event to stop the thread
+    """
+    qual_tool_class_path = 'com.nvidia.spark.rapids.tool.qualification.QualificationMain'
+
+    def is_qual_tool_running() -> bool:
+        return E2ETestUtils.run_sys_cmd(["pgrep", "-f", f"java.*{qual_tool_class_path}"]).returncode == 0
+
+    while not _stop_event.is_set() and not is_qual_tool_running():
+        sleep(1)
+
+    if is_qual_tool_running():
+        cmd_result = E2ETestUtils.run_sys_cmd(["pkill", "-f", f"java.*{qual_tool_class_path}"])
+        E2ETestUtils.assert_sys_cmd_return_code(cmd_result,
+                                                exp_return_code=0,
+                                                error_msg="Failed to kill the qualification tool.")
+
+
+@given('thread to crash qualification tool has started')
+def step_start_qualification_tool_crash_thread(context) -> None:
+    stop_event = threading.Event()
+    qual_tool_thread = threading.Thread(target=_start_qualification_tool_crash_thread_internal, args=(stop_event,))
+    qual_tool_thread.start()
+
+    def after_scenario_fn():
+        stop_event.set()
+        qual_tool_thread.join()
+        stop_event.clear()
+
+    set_after_scenario_fn(context, after_scenario_fn)
+
+
+@given('HDFS is "{status}"')
+def step_setup_hdfs(context, status) -> None:
+    if HdfsTestUtils.hdfs_is_active():
+        raise RuntimeError('HDFS is already active. Please stop it before running the tests.')
+
+    test_hdfs_status = HdfsStatus.fromstring(status)
+    if test_hdfs_status == HdfsStatus.NOT_INSTALLED:
+        # Do nothing if HDFS should not be installed
+        return
+    if test_hdfs_status == HdfsStatus.RUNNING:
+        # Set up HDFS and start it
+        logger.warning('Setting up and starting HDFS. This may take a while.')
+        should_run = True
+    elif test_hdfs_status == HdfsStatus.NOT_RUNNING:
+        # Set up HDFS but do not start it
+        logger.warning('Setting up HDFS without starting it. This may take a while.')
+        should_run = False
+    else:
+        raise ValueError(f"HDFS status '{status}' is not valid.")
+
+    set_after_scenario_fn(context, HdfsTestUtils.cleanup_hdfs)
+    HdfsTestUtils.setup_hdfs(should_run)
+
+
+@given('HDFS has "{event_logs}" eventlogs')
+def step_hdfs_has_eventlogs(context, event_logs) -> None:
+    event_logs_list = E2ETestUtils.resolve_event_logs(event_logs.split(","))
+    hdfs_event_logs_dir = '/'
+    for event_log in event_logs_list:
+        hdfs_copy_cmd = ['hdfs', 'dfs', '-copyFromLocal',  '-f', event_log, hdfs_event_logs_dir]
+        cmd_result = E2ETestUtils.run_sys_cmd(hdfs_copy_cmd)
+        E2ETestUtils.assert_sys_cmd_return_code(cmd_result, exp_return_code=0,
+                                                error_msg="Failed to copy event logs to HDFS")
+
+
+@when('spark-rapids tool is executed with "{event_logs}" eventlogs')
+def step_execute_spark_rapids_tool(context, event_logs) -> None:
+    event_logs_list = E2ETestUtils.resolve_event_logs(event_logs.split(","))
+    if hasattr(context, 'platform'):
+        cmd = E2ETestUtils.create_spark_rapids_cmd(event_logs_list, context.temp_dir, context.platform)
+    else:
+        cmd = E2ETestUtils.create_spark_rapids_cmd(event_logs_list, context.temp_dir)
+    context.result = E2ETestUtils.run_sys_cmd(cmd)
+
+
+@then('stderr contains the following')
+def step_verify_stderr(context) -> None:
+    expected_stderr_list = context.text.strip().split(";")
+    for stderr_line in expected_stderr_list:
+        assert stderr_line in context.result.stderr, \
+            (f"Expected stderr line '{stderr_line}' not found\n" +
+             E2ETestUtils.get_cmd_output_str(context.result))
+
+
+@then('stdout contains the following')
+def step_verify_stdout(context) -> None:
+    expected_stdout_list = context.text.strip().split(";")
+    for stdout_line in expected_stdout_list:
+        assert stdout_line in context.result.stdout, \
+            (f"Expected stdout line '{stdout_line}' not found\n" +
+             E2ETestUtils.get_cmd_output_str(context.result))
+
+
+@then('processed applications is "{expected_num_apps}"')
+def step_verify_num_apps(context, expected_num_apps) -> None:
+    actual_num_apps = -1
+    for stdout_line in context.result.stdout.splitlines():
+        if "Processed applications" in stdout_line:
+            actual_num_apps = int(stdout_line.split()[-1])
+    assert actual_num_apps == int(expected_num_apps), \
+        f"Expected: {expected_num_apps}, Actual: {actual_num_apps}"
+
+
+@then('return code is "{return_code:d}"')
+def step_verify_return_code(context, return_code) -> None:
+    assert context.result.returncode == return_code, \
+        f"Expected return code: {return_code}, Actual return code: {context.result.returncode}"
diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/gpu_eventlog.zstd b/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/gpu_eventlog.zstd
new file mode 100644
index 000000000..31ff77c93
Binary files /dev/null and b/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/gpu_eventlog.zstd differ
diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/incorrect_app_status_eventlog.zstd b/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/incorrect_app_status_eventlog.zstd
new file mode 100644
index 000000000..27f62c180
Binary files /dev/null and b/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/incorrect_app_status_eventlog.zstd differ
diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/join_agg_on_yarn_eventlog.zstd b/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/join_agg_on_yarn_eventlog.zstd
new file mode 100644
index 000000000..8cfba577a
Binary files /dev/null and b/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/join_agg_on_yarn_eventlog.zstd differ
diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/photon_eventlog.zstd b/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/photon_eventlog.zstd
new file mode 100644
index 000000000..2ff810859
Binary files /dev/null and b/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/photon_eventlog.zstd differ
diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/streaming_eventlog.zstd b/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/streaming_eventlog.zstd
new file mode 100644
index 000000000..52470979e
Binary files /dev/null and b/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/streaming_eventlog.zstd differ
diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/common.sh b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/common.sh
new file mode 100755
index 000000000..c92145ea1
--- /dev/null
+++ b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/common.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+readonly E2E_TEST_HDFS_DIR="${E2E_TEST_TMP_DIR}/hadoop"
+export E2E_TEST_HDFS_DIR E2E_TEST_TMP_DIR
+export LC_ALL=C
+
+err() {
+    echo "ERROR: $1" >&2
+    exit 1
+}
diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/cleanup_hdfs.sh b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/cleanup_hdfs.sh
new file mode 100755
index 000000000..f2b0476ef
--- /dev/null
+++ b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/cleanup_hdfs.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script stops any running HDFS services and removes the HDFS directories.
+# Usage: ./cleanup_hdfs.sh
+
+readonly CURRENT_FILE_PATH=$(realpath "${0}")
+
+load_common_scripts() {
+  local scripts_dir=$(dirname "$(dirname "${CURRENT_FILE_PATH}")")
+  source "${scripts_dir}/common.sh"
+}
+
+# Stop HDFS services
+stop_hdfs_services() {
+    if jps | grep -q "NameNode\|DataNode"; then
+        echo "Stopping HDFS..."
+        local hadoop_home="${E2E_TEST_HDFS_DIR}/hadoop-${E2E_TEST_HADOOP_VERSION}"
+        local hdfs_bin="${hadoop_home}/bin/hdfs"
+        [ ! -f "${hdfs_bin}" ] && err "HDFS binary not found at ${hdfs_bin}. However, HDFS services are running."
+        ${hdfs_bin} --daemon stop namenode
+        ${hdfs_bin} --daemon stop datanode
+    else
+        echo "HDFS is not running."
+    fi
+}
+
+cleanup_hdfs_dir() {
+    rm -rf "${E2E_TEST_HDFS_DIR}"
+    echo "Removed HDFS directories."
+}
+
+main() {
+    load_common_scripts
+    stop_hdfs_services
+    cleanup_hdfs_dir
+}
+
+main
diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/setup_hdfs.sh b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/setup_hdfs.sh
new file mode 100755
index 000000000..10e979bf9
--- /dev/null
+++ b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/setup_hdfs.sh
@@ -0,0 +1,195 @@
+#!/bin/bash
+
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script sets up and configures Hadoop HDFS. It optionally starts HDFS services
+# based on the provided HDFS_SHOULD_RUN flag.
+#
+# HDFS Configuration:
+# - Replication factor: 1
+# - Disk Space Quota: 2GB
+# - Temp Directory: /tmp/spark_rapids_tools_e2e_tests
+#
+# Usage: ./setup_hdfs.sh --run|--no-run
+# Options:
+#   --run     Run HDFS services (default)
+#   --no-run  Do not run HDFS
+
+set -e
+
+usage() {
+    echo "Usage: $0 --run|--no-run" >&2
+    echo "Options:"
+    echo "  --run     Run HDFS services (default)" >&2
+    echo "  --no-run  Do not run HDFS" >&2
+    exit 1
+}
+
+if [ $# -eq 0 ]; then
+    usage
+fi
+
+# Parse arguments
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --run)
+            readonly HDFS_SHOULD_RUN=true
+            shift
+            ;;
+        --no-run)
+            readonly HDFS_SHOULD_RUN=false
+            shift
+            ;;
+        *)
+            echo "Invalid option: $1" >&2
+            usage
+            ;;
+    esac
+done
+
+echo "HDFS_SHOULD_RUN: ${HDFS_SHOULD_RUN}"
+readonly DEFAULT_CORE_SITE_XML="core-site.xml"
+readonly DEFAULT_HDFS_SITE_XML="hdfs-site.xml"
+readonly HDFS_SPACE_QUOTA="2g"
+readonly CURRENT_FILE_PATH=$(realpath "${0}")
+readonly HDFS_SCRIPTS_DIR=$(dirname "${CURRENT_FILE_PATH}")
+readonly VERIFY_HDFS_SERVICES_MAX_RETRY=3
+readonly VERIFY_HDFS_SERVICES_SLEEP_SEC=5
+
+load_common_scripts() {
+  local scripts_dir=$(dirname "${HDFS_SCRIPTS_DIR}")
+  source "${scripts_dir}/common.sh"
+}
+
+# Validate environment variables and dependencies
+validate_env() {
+    [ -z "${JAVA_HOME}" ] && err "JAVA_HOME is not set. Please set JAVA_HOME."
+    [ -z "${E2E_TEST_HADOOP_VERSION}" ] && err "E2E_TEST_HADOOP_VERSION is not set. Please set E2E_TEST_HADOOP_VERSION."
+    [ -z "${E2E_TEST_TMP_DIR}" ] && err "E2E_TEST_TMP_DIR is not set. Please set E2E_TEST_TMP_DIR (e.g. /tmp/spark_rapids_tools_e2e_tests)."
+    command -v jps >/dev/null || err "jps is not available. Please install JDK or add JDK bin directory to PATH."
+}
+
+# Set up HDFS directories
+setup_hdfs_dirs() {
+    echo "Setting up HDFS directories..."
+    readonly E2E_TEST_NAME_NODE_DIR="${E2E_TEST_HDFS_DIR}/namenode"
+    readonly E2E_TEST_DATA_NODE_DIR="${E2E_TEST_HDFS_DIR}/datanode"
+    rm -rf "${E2E_TEST_HDFS_DIR}" "${E2E_TEST_NAME_NODE_DIR}" "${E2E_TEST_DATA_NODE_DIR}"
+    mkdir -p "${E2E_TEST_HDFS_DIR}" "${E2E_TEST_NAME_NODE_DIR}" "${E2E_TEST_DATA_NODE_DIR}"
+    export E2E_TEST_NAME_NODE_DIR E2E_TEST_DATA_NODE_DIR
+}
+
+# Function to verify checksum
+verify_checksum() {
+    echo "Verifying checksum..."
+    if [ $# -ne 2 ]; then
+        err "verify_checksum requires two arguments: file and checksum_file."
+    fi
+    local file="$1"
+    local checksum_file="$2"
+    local expected_checksum=$(awk '{print $4}' "${checksum_file}")
+    local actual_checksum=$(shasum -a 512 "${file}" | awk '{print $1}')
+    [ "${expected_checksum}" != "${actual_checksum}" ] && return 1 || return 0
+}
+
+# Function to download and extract Hadoop
+download_and_extract_hadoop() {
+    echo "Downloading and extracting Hadoop..."
+    local hadoop_url="https://dlcdn.apache.org/hadoop/common/hadoop-${E2E_TEST_HADOOP_VERSION}/hadoop-${E2E_TEST_HADOOP_VERSION}.tar.gz"
+    local hadoop_tar_file="${E2E_TEST_TMP_DIR}/hadoop-${E2E_TEST_HADOOP_VERSION}.tar.gz"
+    local checksum_url="${hadoop_url}.sha512"
+    local checksum_file="${hadoop_tar_file}.sha512"
+
+    if [ ! -f "${hadoop_tar_file}" ]; then
+        wget -O "${hadoop_tar_file}" "${hadoop_url}" || err "Failed to download Hadoop tarball."
+    fi
+
+    # Verify checksum and re-download if needed
+    wget -O "${checksum_file}" "${checksum_url}" || err "Failed to download checksum file."
+    if ! verify_checksum "${hadoop_tar_file}" "${checksum_file}"; then
+        wget -O "${hadoop_tar_file}" "${hadoop_url}" || err "Failed to download Hadoop tarball."
+        if ! verify_checksum "${hadoop_tar_file}" "${checksum_file}"; then
+            err "Checksum verification failed after re-downloading. Exiting..."
+        fi
+    fi
+
+    tar -xzf "${hadoop_tar_file}" -C "${E2E_TEST_HDFS_DIR}" || err "Failed to extract Hadoop tarball."
+}
+
+# Configure Hadoop
+configure_hadoop() {
+    echo "Configuring Hadoop..."
+    readonly HADOOP_HOME="${E2E_TEST_HDFS_DIR}/hadoop-${E2E_TEST_HADOOP_VERSION}"
+    readonly HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
+    export HADOOP_HOME HADOOP_CONF_DIR
+    export PATH="${PATH}:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin"
+    envsubst < "${HDFS_SCRIPTS_DIR}/templates/${DEFAULT_CORE_SITE_XML}" > "${HADOOP_HOME}/etc/hadoop/core-site.xml"
+    envsubst < "${HDFS_SCRIPTS_DIR}/templates/${DEFAULT_HDFS_SITE_XML}" > "${HADOOP_HOME}/etc/hadoop/hdfs-site.xml"
+}
+
+# Format the Namenode
+format_namenode() {
+    echo "Formatting the Namenode..."
+    yes | hdfs namenode -format || err "Failed to format Namenode."
+}
+
+# Start HDFS services
+start_hdfs_services() {
+    echo "Starting HDFS services..."
+    hdfs --daemon start namenode
+    hdfs --daemon start datanode
+}
+
+# Verify that HDFS services are running
+verify_hdfs_services() {
+    echo "Verifying HDFS services..."
+    jps | grep -q "NameNode" || err "Namenode is not running."
+    jps | grep -q "DataNode" || err "Datanode is not running."
+    hdfs dfs -ls / || err "Failed to list HDFS root directory."
+    hdfs dfsadmin -setSpaceQuota "${HDFS_SPACE_QUOTA}" / || err "Failed to set space quota of ${HDFS_SPACE_QUOTA}"
+    hdfs dfsadmin -report || err "Failed to get HDFS report."
+}
+
+verify_hdfs_services_with_retry() {
+    local max_retry=$1
+    local count=1
+    while [[ ${count} -le ${max_retry} ]]; do
+        echo "Attempt ${count} of ${max_retry}..."
+        if verify_hdfs_services; then
+            echo "HDFS services are running."
+            return 0
+        fi
+        echo "HDFS services verification failed. Retrying in ${VERIFY_HDFS_SERVICES_SLEEP_SEC} seconds..."
+        sleep ${VERIFY_HDFS_SERVICES_SLEEP_SEC}
+        ((count++))
+    done
+    return 1
+}
+
+main() {
+    load_common_scripts
+    validate_env
+    setup_hdfs_dirs
+    download_and_extract_hadoop
+    configure_hadoop
+    if [ "${HDFS_SHOULD_RUN}" = true ]; then
+      format_namenode
+      start_hdfs_services
+      verify_hdfs_services_with_retry ${VERIFY_HDFS_SERVICES_MAX_RETRY} || err "Failed to start HDFS services after ${VERIFY_HDFS_SERVICES_MAX_RETRY} attempts."
+    fi
+    echo "${HADOOP_HOME}"
+}
+
+main
diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/templates/core-site.xml b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/templates/core-site.xml
new file mode 100644
index 000000000..04958b67e
--- /dev/null
+++ b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/templates/core-site.xml
@@ -0,0 +1,26 @@
+<!--
+  Copyright (c) 2024, NVIDIA CORPORATION.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<configuration>
+    <property>
+        <name>fs.defaultFS</name>
+        <value>hdfs://localhost:9000</value>
+    </property>
+    <property>
+        <name>hadoop.tmp.dir</name>
+        <value>${E2E_TEST_DATA_NODE_DIR}</value>
+    </property>
+</configuration>
diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/templates/hdfs-site.xml b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/templates/hdfs-site.xml
new file mode 100644
index 000000000..d68093caa
--- /dev/null
+++ b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/templates/hdfs-site.xml
@@ -0,0 +1,30 @@
+<!--
+  Copyright (c) 2024, NVIDIA CORPORATION.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<configuration>
+    <property>
+        <name>dfs.replication</name>
+        <value>1</value>
+    </property>
+    <property>
+        <name>dfs.namenode.name.dir</name>
+        <value>file:${E2E_TEST_NAME_NODE_DIR}</value>
+    </property>
+    <property>
+        <name>dfs.datanode.data.dir</name>
+        <value>file:${E2E_TEST_DATA_NODE_DIR}</value>
+    </property>
+</configuration>
diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/setup_env.sh b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/setup_env.sh
new file mode 100755
index 000000000..c8c523213
--- /dev/null
+++ b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/setup_env.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+err () {
+    echo "ERROR: $1" >&2
+    exit 1
+}
+
+if [ -z "$E2E_TEST_TOOLS_DIR" ]; then
+  err "Please set E2E_TEST_TOOLS_DIR to the root directory of the spark-rapids-tools repository. Exiting script."
+fi
+
+if [ -z "$E2E_TEST_SPARK_BUILD_VERSION" ]; then
+  err "Please set E2E_TEST_SPARK_BUILD_VERSION to the version of Spark used for building Tools JAR. Exiting script."
+fi
+
+if [ -z "$E2E_TEST_HADOOP_VERSION" ]; then
+  err "Please set E2E_TEST_HADOOP_VERSION to the version of Hadoop used for building Tools JAR. Exiting script."
+fi
+
+build_jar() {
+  local jar_tools_dir="$E2E_TEST_TOOLS_DIR/core"
+  echo "Building Spark RAPIDS Tools JAR file"
+  pushd "$jar_tools_dir"
+  mvn package -DskipTests -Dbuildver="$E2E_TEST_SPARK_BUILD_VERSION" -Dhadoop.version="$E2E_TEST_HADOOP_VERSION"
+  popd
+}
+
+install_python_package() {
+  if [ -z "$E2E_TEST_VENV_DIR" ]; then
+    err "Please set E2E_TEST_VENV_DIR to the name of the virtual environment. Exiting script."
+  fi
+
+  echo "Setting up Python environment in $E2E_TEST_VENV_DIR"
+  local python_tools_dir="$E2E_TEST_TOOLS_DIR/user_tools"
+  python -m venv "$E2E_TEST_VENV_DIR"
+  source "$E2E_TEST_VENV_DIR"/bin/activate
+
+  echo "Installing Spark RAPIDS Tools Python package"
+  pushd "$python_tools_dir"
+  pip install --upgrade pip setuptools wheel
+  pip install .
+  popd
+}
+
+# Check if the Tools JAR file exists or if the user wants to build it
+if [ ! -f "$E2E_TEST_TOOLS_JAR_PATH" ] || [ "$E2E_TEST_BUILD_JAR" = "true" ]; then
+  build_jar
+fi
+
+install_python_package
diff --git a/user_tools/tox.ini b/user_tools/tox.ini
index 835cfb999..b7a766d98 100644
--- a/user_tools/tox.ini
+++ b/user_tools/tox.ini
@@ -9,20 +9,22 @@ envlist =
     coverage
     pylint
     flake8
+    behave
 isolated_build = True
 
 [gh-actions]
 python =
-    3.8: python3.8, pylint, flake8
-    3.9: python3.9, pylint, flake8
-    3.10: python3.10, pylint, flake8
-    3.11: python3.11, pylint, flake8
+    3.8: python3.8, pylint, flake8, behave
+    3.9: python3.9, pylint, flake8, behave
+    3.10: python3.10, pylint, flake8, behave
+    3.11: python3.11, pylint, flake8, behave
 
 [testenv]
 deps =
     pytest
     pytest-cov
     cli_test_helpers
+    behave
 setenv =
     COVERAGE_FILE = {env:COVERAGE_FILE:{toxworkdir}/.coverage.{envname}}
 commands =
@@ -63,3 +65,23 @@ commands = flake8 \
 extend-ignore =
     E501,
 exclude = .tox,build,dist
+
+[testenv:behave]
+deps = behave
+passenv	= JAVA_HOME
+commands = behave {posargs}
+
+[behave]
+paths = tests/spark_rapids_tools_e2e/features
+stderr_capture = false
+
+[behave.userdata]
+# Default maven arguments for building the Tools JAR
+buildver = 350
+hadoop.version = 3.3.6
+# Default arguments for the behave tests
+scala_version = 2.12
+venv_name = spark_rapids_tools_e2e_tests_venv
+setup_script_file = setup_env.sh
+build_jar = true
+e2e_tests_tmp_dir = /tmp/spark_rapids_tools_e2e_tests