diff --git a/scripts/header-check.sh b/scripts/header-check.sh index 22a1847fc..b9c7d2325 100755 --- a/scripts/header-check.sh +++ b/scripts/header-check.sh @@ -26,9 +26,10 @@ EXCLUDE_PATTERNS=( "core/src/main/resources/*" "core/src/test/resources/*" "user_tools/src/spark_rapids_pytools/resources/*" - "user_tools/docs/resources/*" + "user_tools/docs/*" "user_tools/tests/spark_rapids_tools_ut/resources/*" "*.csv" + "*.zstd" ) # Create the grep exclude options (--exclude=*csv --exclude=core/src/test/resources/*) diff --git a/user_tools/docs/resources/debug-behave-intellij.png b/user_tools/docs/resources/debug-behave-intellij.png new file mode 100644 index 000000000..a21c69587 Binary files /dev/null and b/user_tools/docs/resources/debug-behave-intellij.png differ diff --git a/user_tools/docs/tools_e2e_tests.md b/user_tools/docs/tools_e2e_tests.md new file mode 100644 index 000000000..6cd9a4d93 --- /dev/null +++ b/user_tools/docs/tools_e2e_tests.md @@ -0,0 +1,172 @@ +# Spark Rapids Tools End-to-End Behavior Tests + +This document outlines the end-to-end tests for Spark Rapids tools, designed to cover scenarios such as missing +dependencies, handling different types of event logs, and interacting with HDFS. + +## Directory Structure +```commandline +user_tools/tests/spark_rapids_tools_e2e/ +├── features # Contains test scenarios and environment setup. +│ ├── environment.py # Setup and teardown procedures for the tests. +│ ├── steps # Step definitions for the tests. +│ └── *.feature # Feature files defining test scenarios. +└── resources # Resources used in the tests. + ├── event_logs + └── scripts # Scripts used in the tests. +``` +Configurations for `behave` tests are defined in `user_tools/tox.ini` file. + +## Setup + +From the `/user_tools` directory, run the following command to install the required dependencies: + + +```sh +pip install behave +# or +pip install .[test] +``` + + +## Running Tests +Tests can be run using 'behave' cmd or using 'tox' cmd. + +**Basic Usage:** + +```sh +behave +# or +tox -e behave -- +``` + +**Run All Tests:** + +```sh +behave +# or +tox -e behave +``` + +### Common Options + +**Run Specific Tests by Tag** + +See the [Tags Format](#tags-format) section for more information on tags. + +```sh +behave --tags +# or +tox -e behave -- --tags +``` + +**Run Specific Tests by Name** + +```sh +behave --name +# or +tox -e behave -- --name +``` + +**Skip Tests by Tag** + +```sh +behave --tags ~ +# or +tox -e behave -- --tags ~ +``` + +**Custom Arguments** +- Custom arguments can be passed to the behave tests using the `-D` flag. +- Example: Skip building the Tools jar during setup. + +```sh +behave -D build_jar=false # Skip building the Tools jar during setup (default: true) +# or +tox -e behave -- -D build_jar=false +``` + +**Verbose Mode** +- When verbose mode is enabled, `STDOUT` and `STDERR` from all subprocesses executed during the test run are shown in the console. +```sh +behave -v +# or +tox -e behave -- -v +``` + +## Notes + +### Tags Format +Tags are used to uniquely identify test scenarios and are defined in the following format: `@test_id__<00xx>`. +- ``: Acronym for the feature file being tested. Examples: + - `ELP` for `event_log_processing.feature` + - `IC` for `installation_checks.feature` +- `<00xx>`: Unique 4-digit identifier for the test scenario. Examples: `0001`, `0002`. + +Tags Example: `@test_id_ELP_0001`, `@test_id_IC_0002`. + +### Built-in Setup Steps + +The tests include the following setup steps: + +1. Build Spark Rapids Tools JAR: + - By default, the JAR is built before running the tests. + - To skip this step (e.g., if the JAR is already built), use the argument -D build_jar=false. +2. Build the Python Package. + +The test warns the user that initial setup may take a few minutes. + +### Built-in HDFS Cluster Setup + +- Some of the tests include configuring a local HDFS cluster. Step: `HDFS is "{status}"` +- This step downloads Hadoop binaries and sets up the cluster. + - The download occurs only once per machine but cluster setup is done for each test run. + - Download step may take a few minutes. +- Tests involving HDFS are tagged with `@long_running` and can be skipped using `--tags ~@long_running` + +#### HDFS Configuration: +- Replication factor: 1 +- Disk Space Quota: 2GB +- Temp Directory: `/tmp/spark_rapids_tools_e2e_tests` + - Temp Directory can be changed using the argument `-D e2e_tests_tmp_dir=` during test run. + +#### Cleanup +- Step `HDFS is "{status}"` sets an after scenario hook to stop up the HDFS cluster and remove the temporary directories. +- It does not clean up the Hadoop binaries downloaded during the setup. +- Cleanup can be done manually using the below script: +```sh +/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/cleanup_hdfs.sh +``` + +## Debugging Tests in IDE: + +- Ensure the Python interpreter is set to the correct virtual environment and `JAVA_HOME` is set. + +**IntelliJ** +- Add a Python run configuration with module name: `behave` and working directory: `/user_tools`. +- Add required arguments in `Script parameters` field. + +Sample Run Configuration: +![resources/debug-behave-intellij.png](resources/debug-behave-intellij.png) + +**VS Code** +- Open or create the `.vscode/launch.json` file. Add the following configuration with required arguments: +```json +{ + "configurations": [ + { + "name": "Python: Spark Rapids Tools E2E Tests", + "type": "debugpy", + "request": "launch", + "module": "behave", + "args": [], + "python": "${command:python.interpreterPath}", + "cwd": "${workspaceFolder}/user_tools" + } + ] +} +``` + + +## Guidelines for Writing Tests + +TODO: Add guidelines and conventions for writing tests. diff --git a/user_tools/pyproject.toml b/user_tools/pyproject.toml index b4fdf1ed7..39d46ebfe 100644 --- a/user_tools/pyproject.toml +++ b/user_tools/pyproject.toml @@ -76,7 +76,7 @@ version = {attr = "spark_rapids_pytools.__version__"} repository = "https://github.com/NVIDIA/spark-rapids-tools/tree/main" [project.optional-dependencies] test = [ - "tox", 'pytest', 'cli_test_helpers' + "tox", 'pytest', 'cli_test_helpers', 'behave' ] qualx = [ "holoviews", diff --git a/user_tools/tests/spark_rapids_tools_e2e/features/environment.py b/user_tools/tests/spark_rapids_tools_e2e/features/environment.py new file mode 100644 index 000000000..228f3e2dd --- /dev/null +++ b/user_tools/tests/spark_rapids_tools_e2e/features/environment.py @@ -0,0 +1,119 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This module defines environment setup and teardown functions for the end-to-end tests using behave. +""" + +import os +import shutil +import tempfile + +from spark_rapids_tools.utils import Utilities +from steps.e2e_utils import E2ETestUtils + +""" Define behave hooks for the tests. These hooks are automatically called by behave. """ + +logger = E2ETestUtils.get_logger() + + +def before_all(context) -> None: + """ + Set up the environment for the tests. This function is automatically called before all the tests. + """ + context.temp_dir = tempfile.mkdtemp() + _set_environment_variables(context) + _set_verbose_mode(context) + _setup_env(context) + + +def after_all(context) -> None: + """ + Clean up the environment after the tests. This function is automatically called after all the tests. + """ + _clear_environment_variables() + shutil.rmtree(context.temp_dir) + + +def before_scenario(context, scenario) -> None: + if "skip" in scenario.effective_tags: + scenario.skip("Marked with @skip") + return + + +def after_scenario(context, scenario) -> None: + """ + Clean up the environment after each scenario. This function is automatically called after each scenario. + Steps must set the callback function using set_after_scenario_fn() to perform any cleanup. + """ + if hasattr(context, 'after_scenario_fn'): + context.after_scenario_fn() + + +def _set_verbose_mode(context) -> None: + verbose_enabled = getattr(context.config, 'verbose', False) + if verbose_enabled: + context.config.stdout_capture = False + context.config.stderr_capture = False + os.environ['E2E_TEST_VERBOSE_MODE'] = str(verbose_enabled).lower() + + +def _set_environment_variables(context) -> None: + """ + Set environment variables needed for the virtual environment setup. + """ + tools_version = Utilities.get_base_release() + scala_version = context.config.userdata.get('scala_version') + venv_name = context.config.userdata.get('venv_name') + jar_filename = f'rapids-4-spark-tools_{scala_version}-{tools_version}-SNAPSHOT.jar' + build_jar_value = context.config.userdata.get('build_jar') + build_jar = build_jar_value.lower() in ['true', '1', 'yes'] + + os.environ['E2E_TEST_TOOLS_DIR'] = E2ETestUtils.get_tools_root_path() + os.environ['E2E_TEST_SCRIPTS_DIR'] = os.path.join(E2ETestUtils.get_e2e_tests_resource_path(), 'scripts') + os.environ['E2E_TEST_TOOLS_JAR_PATH'] = os.path.join(os.environ['E2E_TEST_TOOLS_DIR'], + f'core/target/{jar_filename}') + os.environ['E2E_TEST_VENV_DIR'] = os.path.join(context.temp_dir, venv_name) + os.environ['E2E_TEST_BUILD_JAR'] = 'true' if build_jar else 'false' + os.environ['E2E_TEST_SPARK_BUILD_VERSION'] = context.config.userdata.get('buildver') + os.environ['E2E_TEST_HADOOP_VERSION'] = context.config.userdata.get('hadoop.version') + os.environ['E2E_TEST_TMP_DIR'] = context.config.userdata.get('e2e_tests_tmp_dir') + + +def _setup_env(context) -> None: + """ + Build the JAR and set up the virtual environment for the tests. + """ + script_file_name = context.config.userdata.get('setup_script_file') + script = os.path.join(os.environ['E2E_TEST_SCRIPTS_DIR'], script_file_name) + try: + warning_msg = "Setting up the virtual environment for the tests. This may take a while." + if os.environ.get('BUILD_JAR') == 'true': + warning_msg = f'Building JAR and {warning_msg}' + logger.warning(warning_msg) + result = E2ETestUtils.run_sys_cmd([script]) + E2ETestUtils.assert_sys_cmd_return_code(result, + exp_return_code=0, + error_msg="Failed to create virtual environment") + except Exception as e: # pylint: disable=broad-except + raise RuntimeError(f"Failed to create virtual environment. Reason: {str(e)}") from e + + +def _clear_environment_variables() -> None: + """ + Clear environment variables set for the virtual environment setup. + """ + env_vars = ['SCRIPTS_DIR', 'VENV_DIR', 'TOOLS_JAR_PATH'] + for key in env_vars: + os.environ.pop(key, None) diff --git a/user_tools/tests/spark_rapids_tools_e2e/features/event_log_processing.feature b/user_tools/tests/spark_rapids_tools_e2e/features/event_log_processing.feature new file mode 100644 index 000000000..6a3f97cb0 --- /dev/null +++ b/user_tools/tests/spark_rapids_tools_e2e/features/event_log_processing.feature @@ -0,0 +1,43 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Feature: Event Log Processing + + @test_id_ELP_0001 + Scenario Outline: Tool spark_rapids runs with different types of event logs + When spark-rapids tool is executed with "" eventlogs + Then stderr contains the following + """ + + """ + And processed applications is "0" + And return code is "0" + + Examples: + | event_logs | expected_stderr | + | invalid_path_eventlog | process.failure.count = 1;invalid_path_eventlog not found, skipping! | + | gpu_eventlog.zstd | process.skipped.count = 1;GpuEventLogException: Cannot parse event logs from GPU run: skipping this file | + | photon_eventlog.zstd | process.skipped.count = 1;PhotonEventLogException: Encountered Databricks Photon event log: skipping this file! | + | streaming_eventlog.zstd | process.skipped.count = 1;StreamingEventLogException: Encountered Spark Structured Streaming Job: skipping this file! | + | incorrect_app_status_eventlog.zstd | process.NA.count = 1;IncorrectAppStatusException: Application status is incorrect. Missing AppInfo | + + @test_id_ELP_0002 + Scenario: Qualification tool JAR crashes + Given thread to crash qualification tool has started + When spark-rapids tool is executed with "join_agg_on_yarn_eventlog.zstd" eventlogs + Then stderr contains the following + """ + Qualification. Raised an error in phase [Execution] + """ + And return code is "1" diff --git a/user_tools/tests/spark_rapids_tools_e2e/features/hdfs_storage.feature b/user_tools/tests/spark_rapids_tools_e2e/features/hdfs_storage.feature new file mode 100644 index 000000000..714ddfc2b --- /dev/null +++ b/user_tools/tests/spark_rapids_tools_e2e/features/hdfs_storage.feature @@ -0,0 +1,88 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@long_running +Feature: HDFS Event Log Storage + + @test_id_HDFS_0001 + Scenario Outline: Eventlogs are stored in HDFS - Platform specified + Given platform is "" + And HDFS is "running" + And HDFS has "" eventlogs + When spark-rapids tool is executed with "hdfs:///" eventlogs + Then processed applications is "1" + And return code is "0" + + Examples: + | platform | eventlog | + | onprem | join_agg_on_yarn_eventlog.zstd | + | dataproc | join_agg_on_yarn_eventlog.zstd | + + @test_id_HDFS_0002 + Scenario Outline: Eventlogs are stored in HDFS - Platform not specified + Given HDFS is "running" + And HDFS has "" eventlogs + When spark-rapids tool is executed with "hdfs:///" eventlogs + Then processed applications is "1" + And return code is "0" + + Examples: + | eventlog | + | join_agg_on_yarn_eventlog.zstd | + + @test_id_HDFS_0003 + Scenario Outline: Eventlogs are stored in HDFS - HDFS installed but not running + Given HDFS is "not running" + When spark-rapids tool is executed with "hdfs:///" eventlogs + Then stderr contains the following + """ + EventLogPathProcessor: Unexpected exception occurred reading hdfs:///, skipping! + """ + And processed applications is "0" + And return code is "0" + + Examples: + | eventlog | + | join_agg_on_yarn_eventlog.zstd | + + @test_id_HDFS_0004 + Scenario Outline: Eventlogs are stored in HDFS - HDFS not installed, Platform specified + Given platform is "onprem" + And HDFS is "not installed" + When spark-rapids tool is executed with "hdfs:///" eventlogs + Then stderr contains the following + """ + EventLogPathProcessor: Unexpected exception occurred reading hdfs:///, skipping!;Incomplete HDFS URI + """ + And processed applications is "0" + And return code is "0" + + Examples: + | eventlog | + | join_agg_on_yarn_eventlog.zstd | + + @test_id_HDFS_0005 + Scenario Outline: Eventlogs are stored in HDFS - HDFS not installed, Platform not specified + Given HDFS is "not installed" + When spark-rapids tool is executed with "hdfs:///" eventlogs + Then stderr contains the following + """ + EventLogPathProcessor: Unexpected exception occurred reading hdfs:///, skipping!;Incomplete HDFS URI + """ + And processed applications is "0" + And return code is "0" + + Examples: + | eventlog | + | join_agg_on_yarn_eventlog.zstd | diff --git a/user_tools/tests/spark_rapids_tools_e2e/features/installation_checks.feature b/user_tools/tests/spark_rapids_tools_e2e/features/installation_checks.feature new file mode 100644 index 000000000..3edce082f --- /dev/null +++ b/user_tools/tests/spark_rapids_tools_e2e/features/installation_checks.feature @@ -0,0 +1,44 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Feature: Tool Installation Checks + + @test_id_IC_0001 + Scenario Outline: Environment has missing CLI and spark_rapids tool processes eventlogs + Given platform is "" + And "" is not installed + When spark-rapids tool is executed with "join_agg_on_yarn_eventlog.zstd" eventlogs + Then stdout contains the following + """ + + """ + And processed applications is "1" + And return code is "0" + + Examples: + | platform | cli | expected_stdout | + | dataproc | gcloud | 2 x n1-standard-16 (4 T4 each) | + | emr | aws | 10 x g5.xlarge | + | databricks-aws | aws | 10 x g5.xlarge | + | databricks-azure | az | 2 x Standard_NC64as_T4_v3 | + + @test_id_IC_0002 + Scenario: Environment has missing java + Given "java" is not installed + When spark-rapids tool is executed with "join_agg_on_yarn_eventlog.zstd" eventlogs + Then stderr contains the following + """ + RuntimeError: Error invoking CMD + """ + And return code is "1" diff --git a/user_tools/tests/spark_rapids_tools_e2e/features/steps/e2e_utils.py b/user_tools/tests/spark_rapids_tools_e2e/features/steps/e2e_utils.py new file mode 100644 index 000000000..4468c0050 --- /dev/null +++ b/user_tools/tests/spark_rapids_tools_e2e/features/steps/e2e_utils.py @@ -0,0 +1,225 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This module defines utility functions used by the end-to-end tests using behave. +""" + +import logging +import os +import subprocess +from attr import dataclass +from enum import auto +from pathlib import Path +from typing import List +from urllib.parse import urlparse + +from spark_rapids_tools import EnumeratedType + + +@dataclass +class E2ETestUtils: + + @staticmethod + def get_cmd_output_str(cmd_result: subprocess.CompletedProcess) -> str: + """ + Get the output of a command as a string (stdout and stderr). + """ + dash_line = '-' * 50 + cmd_sections = [("COMMAND", ' '.join(cmd_result.args)), + ("STDOUT", cmd_result.stdout), + ("STDERR", cmd_result.stderr)] + output_sections = [] + for label, content in cmd_sections: + content = content.strip() if content else "No output" + output_sections.append(f"{dash_line}\n{label}\n{dash_line}\n{content}\n{dash_line}") + return '\n'.join(output_sections).strip() + '\n' + + @classmethod + def run_sys_cmd(cls, cmd: list) -> subprocess.CompletedProcess: + """ + Run a system command and return the result. + If verbose mode is enabled by the behave config, print the command and its output + """ + cmd_result = subprocess.run(cmd, capture_output=True, text=True) + if cls.is_verbose_mode(): + print(cls.get_cmd_output_str(cmd_result)) + return cmd_result + + @classmethod + def assert_sys_cmd_return_code(cls, + cmd_result: subprocess.CompletedProcess, + exp_return_code: int = 0, + error_msg: str = None) -> None: + assert cmd_result.returncode == exp_return_code, \ + f"{error_msg}\n{cls.get_cmd_output_str(cmd_result)}" + + @classmethod + def create_spark_rapids_cmd(cls, + event_logs: List[str], + output_dir: str, + platform: str = 'onprem', + filter_apps: str = 'all') -> List[str]: + """ + Create the command to run the Spark Rapids qualification tool. + TODO: We can add more options to the command as needed. + """ + return [ + cls.get_spark_rapids_cli(), + 'qualification', + '--platform', platform, + '--eventlogs', ','.join(event_logs), + '-o', output_dir, + '--tools_jar', cls.get_tools_jar_file(), + '--verbose', + '--filter_apps', filter_apps + ] + + # Utility getter functions + @staticmethod + def get_tools_root_path() -> str: + return str(Path(__file__).parents[5]) + + @staticmethod + def get_e2e_tests_root_path() -> str: + return str(Path(__file__).parents[2]) + + @classmethod + def get_e2e_tests_config_file(cls) -> str: + return os.path.join(cls.get_e2e_tests_root_path(), 'behave.ini') + + @classmethod + def get_e2e_tests_resource_path(cls) -> str: + return os.path.join(cls.get_e2e_tests_root_path(), 'resources') + + @classmethod + def get_local_event_logs_dir(cls) -> str: + return os.path.join(cls.get_e2e_tests_resource_path(), 'event_logs') + + @staticmethod + def get_spark_rapids_cli() -> str: + return os.path.join(os.environ['E2E_TEST_VENV_DIR'], 'bin', 'spark_rapids') + + @staticmethod + def get_tools_jar_file() -> str: + return os.environ['E2E_TEST_TOOLS_JAR_PATH'] + + @staticmethod + def is_verbose_mode() -> bool: + return os.environ['E2E_TEST_VERBOSE_MODE'].lower() == 'true' + + @classmethod + def resolve_event_logs(cls, event_logs: List[str]) -> List[str]: + """ + Get the full path of the event logs if they are local files. + """ + # Base directory can be modified (i.e. separate for local and CICD runs) + fs = urlparse(event_logs[0]).scheme + if not fs or fs == 'file': + event_logs_dir = cls.get_local_event_logs_dir() + return [os.path.join(event_logs_dir, event_log) for event_log in event_logs] + return event_logs + + @classmethod + def replace_cli_with_mock(cls, cli_name: str, temp_dir: str) -> None: + """ + Replace the specified CLI in the PATH environment variable with a mock version that simulates the + command not being found. + + :param cli_name: The name of the CLI command to replace in the PATH. + :param temp_dir: The temporary directory where the mock CLI will be created. + """ + mock_cli_path = os.path.join(temp_dir, cli_name) + with open(mock_cli_path, "w") as f: + f.write("#!/bin/bash\n") + f.write(f"echo '{cli_name}: command not found'\n") + f.write("exit 1\n") + os.chmod(mock_cli_path, 0o755) + os.environ['PATH'] = temp_dir + ":" + os.environ['PATH'] + + # verify the CLI is not in the PATH + cmd_result = cls.run_sys_cmd([cli_name]) + cls.assert_sys_cmd_return_code(cmd_result, exp_return_code=1, error_msg=f"{cli_name} is still in the PATH") + + @staticmethod + def get_logger() -> logging.Logger: + """ + Create a logger for the module. + """ + logging.basicConfig( + level=logging.INFO, + format='%(levelname)s: %(message)s' + ) + return logging.getLogger(__name__) + + +class HdfsStatus(EnumeratedType): + RUNNING = auto() + NOT_RUNNING = auto() + NOT_INSTALLED = auto() + + @classmethod + def fromstring(cls, value: str) -> 'EnumeratedType': + return super().fromstring(value.replace(' ', '_')) + + +class HdfsTestUtils: + + @staticmethod + def setup_hdfs(should_run: bool) -> None: + """ + Sets up the HDFS environment. + + Executes a shell script to set up HDFS and configures the environment variables + required for HDFS. Depending on the `should_run` parameter, it either starts HDFS or simply + configures the environment without starting it. + :param should_run: Boolean flag to indicate whether to start HDFS. + """ + try: + hdfs_setup_script = os.path.join(os.environ['E2E_TEST_SCRIPTS_DIR'], 'hdfs', 'setup_hdfs.sh') + args = ["--run" if should_run else "--no-run"] + cmd_result = E2ETestUtils.run_sys_cmd([hdfs_setup_script] + args) + E2ETestUtils.assert_sys_cmd_return_code(cmd_result, exp_return_code=0, error_msg="Failed to setup HDFS") + hadoop_home = cmd_result.stdout.splitlines()[-1] + hadoop_conf_dir = os.path.join(hadoop_home, 'etc', 'hadoop') + assert os.path.exists(hadoop_home), f"HADOOP_HOME: {hadoop_home} does not exist" + os.environ['HADOOP_HOME'] = hadoop_home + if not should_run: + os.environ['HADOOP_CONF_DIR'] = hadoop_conf_dir + os.environ['PATH'] = f"{hadoop_home}/bin:{hadoop_home}/sbin:{os.environ['PATH']}" + except Exception as e: # pylint: disable=broad-except + raise RuntimeError(f"Failed to setup HDFS.\nReason: {e}") from e + + @staticmethod + def cleanup_hdfs() -> None: + """ + Stops the HDFS and cleans up the environment. + """ + hdfs_cleanup_script = os.path.join(os.environ['E2E_TEST_SCRIPTS_DIR'], 'hdfs', 'cleanup_hdfs.sh') + try: + cmd_result = E2ETestUtils.run_sys_cmd([hdfs_cleanup_script]) + E2ETestUtils.assert_sys_cmd_return_code(cmd_result, exp_return_code=0, error_msg="Failed to stop HDFS") + except Exception as e: # pylint: disable=broad-except + raise RuntimeError(f"Failed to stop HDFS.\nReason: {e}") from e + + @staticmethod + def hdfs_is_active() -> bool: + """ + Check if HDFS is already active. + """ + try: + output = subprocess.check_output(['jps'], text=True) + return 'NameNode' in output + except (subprocess.CalledProcessError, FileNotFoundError) as e: + raise RuntimeError("Failed to check if HDFS is running.") from e diff --git a/user_tools/tests/spark_rapids_tools_e2e/features/steps/test_steps.py b/user_tools/tests/spark_rapids_tools_e2e/features/steps/test_steps.py new file mode 100644 index 000000000..e1bd83933 --- /dev/null +++ b/user_tools/tests/spark_rapids_tools_e2e/features/steps/test_steps.py @@ -0,0 +1,172 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This module defines steps to be used by the end-to-end tests using behave. +""" + +import os +import shutil +import tempfile +import threading +from time import sleep +from typing import Callable + +from behave import given, when, then + +from e2e_utils import E2ETestUtils, HdfsTestUtils, HdfsStatus + +logger = E2ETestUtils.get_logger() + + +def set_after_scenario_fn(context, fn: Callable) -> None: + """ + Set the callback function to be called after each scenario. + + See also: + user_tools.tests.spark_rapids_tools_e2e.features.environment.after_scenario() + """ + context.after_scenario_fn = fn + + +@given('platform is "{platform}"') +def step_set_platform(context, platform) -> None: + context.platform = platform + + +@given('"{cli}" is not installed') +def step_replace_cli_with_mock(context, cli) -> None: + original_path = os.environ["PATH"] + tempdir = tempfile.mkdtemp() + + E2ETestUtils.replace_cli_with_mock(cli, tempdir) + + def after_scenario_fn(): + os.environ.update({"PATH": original_path}) + shutil.rmtree(tempdir) + + set_after_scenario_fn(context, after_scenario_fn) + + +def _start_qualification_tool_crash_thread_internal(_stop_event: threading.Event) -> None: + """ + Start a thread to crash the qualification tool after 1s of it starting. + :param _stop_event: Event to stop the thread + """ + qual_tool_class_path = 'com.nvidia.spark.rapids.tool.qualification.QualificationMain' + + def is_qual_tool_running() -> bool: + return E2ETestUtils.run_sys_cmd(["pgrep", "-f", f"java.*{qual_tool_class_path}"]).returncode == 0 + + while not _stop_event.is_set() and not is_qual_tool_running(): + sleep(1) + + if is_qual_tool_running(): + cmd_result = E2ETestUtils.run_sys_cmd(["pkill", "-f", f"java.*{qual_tool_class_path}"]) + E2ETestUtils.assert_sys_cmd_return_code(cmd_result, + exp_return_code=0, + error_msg="Failed to kill the qualification tool.") + + +@given('thread to crash qualification tool has started') +def step_start_qualification_tool_crash_thread(context) -> None: + stop_event = threading.Event() + qual_tool_thread = threading.Thread(target=_start_qualification_tool_crash_thread_internal, args=(stop_event,)) + qual_tool_thread.start() + + def after_scenario_fn(): + stop_event.set() + qual_tool_thread.join() + stop_event.clear() + + set_after_scenario_fn(context, after_scenario_fn) + + +@given('HDFS is "{status}"') +def step_setup_hdfs(context, status) -> None: + if HdfsTestUtils.hdfs_is_active(): + raise RuntimeError('HDFS is already active. Please stop it before running the tests.') + + test_hdfs_status = HdfsStatus.fromstring(status) + if test_hdfs_status == HdfsStatus.NOT_INSTALLED: + # Do nothing if HDFS should not be installed + return + if test_hdfs_status == HdfsStatus.RUNNING: + # Set up HDFS and start it + logger.warning('Setting up and starting HDFS. This may take a while.') + should_run = True + elif test_hdfs_status == HdfsStatus.NOT_RUNNING: + # Set up HDFS but do not start it + logger.warning('Setting up HDFS without starting it. This may take a while.') + should_run = False + else: + raise ValueError(f"HDFS status '{status}' is not valid.") + + set_after_scenario_fn(context, HdfsTestUtils.cleanup_hdfs) + HdfsTestUtils.setup_hdfs(should_run) + + +@given('HDFS has "{event_logs}" eventlogs') +def step_hdfs_has_eventlogs(context, event_logs) -> None: + event_logs_list = E2ETestUtils.resolve_event_logs(event_logs.split(",")) + hdfs_event_logs_dir = '/' + for event_log in event_logs_list: + hdfs_copy_cmd = ['hdfs', 'dfs', '-copyFromLocal', '-f', event_log, hdfs_event_logs_dir] + cmd_result = E2ETestUtils.run_sys_cmd(hdfs_copy_cmd) + E2ETestUtils.assert_sys_cmd_return_code(cmd_result, exp_return_code=0, + error_msg="Failed to copy event logs to HDFS") + + +@when('spark-rapids tool is executed with "{event_logs}" eventlogs') +def step_execute_spark_rapids_tool(context, event_logs) -> None: + event_logs_list = E2ETestUtils.resolve_event_logs(event_logs.split(",")) + if hasattr(context, 'platform'): + cmd = E2ETestUtils.create_spark_rapids_cmd(event_logs_list, context.temp_dir, context.platform) + else: + cmd = E2ETestUtils.create_spark_rapids_cmd(event_logs_list, context.temp_dir) + context.result = E2ETestUtils.run_sys_cmd(cmd) + + +@then('stderr contains the following') +def step_verify_stderr(context) -> None: + expected_stderr_list = context.text.strip().split(";") + for stderr_line in expected_stderr_list: + assert stderr_line in context.result.stderr, \ + (f"Expected stderr line '{stderr_line}' not found\n" + + E2ETestUtils.get_cmd_output_str(context.result)) + + +@then('stdout contains the following') +def step_verify_stdout(context) -> None: + expected_stdout_list = context.text.strip().split(";") + for stdout_line in expected_stdout_list: + assert stdout_line in context.result.stdout, \ + (f"Expected stdout line '{stdout_line}' not found\n" + + E2ETestUtils.get_cmd_output_str(context.result)) + + +@then('processed applications is "{expected_num_apps}"') +def step_verify_num_apps(context, expected_num_apps) -> None: + actual_num_apps = -1 + for stdout_line in context.result.stdout.splitlines(): + if "Processed applications" in stdout_line: + actual_num_apps = int(stdout_line.split()[-1]) + assert actual_num_apps == int(expected_num_apps), \ + f"Expected: {expected_num_apps}, Actual: {actual_num_apps}" + + +@then('return code is "{return_code:d}"') +def step_verify_return_code(context, return_code) -> None: + assert context.result.returncode == return_code, \ + f"Expected return code: {return_code}, Actual return code: {context.result.returncode}" diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/gpu_eventlog.zstd b/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/gpu_eventlog.zstd new file mode 100644 index 000000000..31ff77c93 Binary files /dev/null and b/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/gpu_eventlog.zstd differ diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/incorrect_app_status_eventlog.zstd b/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/incorrect_app_status_eventlog.zstd new file mode 100644 index 000000000..27f62c180 Binary files /dev/null and b/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/incorrect_app_status_eventlog.zstd differ diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/join_agg_on_yarn_eventlog.zstd b/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/join_agg_on_yarn_eventlog.zstd new file mode 100644 index 000000000..8cfba577a Binary files /dev/null and b/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/join_agg_on_yarn_eventlog.zstd differ diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/photon_eventlog.zstd b/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/photon_eventlog.zstd new file mode 100644 index 000000000..2ff810859 Binary files /dev/null and b/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/photon_eventlog.zstd differ diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/streaming_eventlog.zstd b/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/streaming_eventlog.zstd new file mode 100644 index 000000000..52470979e Binary files /dev/null and b/user_tools/tests/spark_rapids_tools_e2e/resources/event_logs/streaming_eventlog.zstd differ diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/common.sh b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/common.sh new file mode 100755 index 000000000..c92145ea1 --- /dev/null +++ b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/common.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +readonly E2E_TEST_HDFS_DIR="${E2E_TEST_TMP_DIR}/hadoop" +export E2E_TEST_HDFS_DIR E2E_TEST_TMP_DIR +export LC_ALL=C + +err() { + echo "ERROR: $1" >&2 + exit 1 +} diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/cleanup_hdfs.sh b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/cleanup_hdfs.sh new file mode 100755 index 000000000..f2b0476ef --- /dev/null +++ b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/cleanup_hdfs.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script stops any running HDFS services and removes the HDFS directories. +# Usage: ./cleanup_hdfs.sh + +readonly CURRENT_FILE_PATH=$(realpath "${0}") + +load_common_scripts() { + local scripts_dir=$(dirname "$(dirname "${CURRENT_FILE_PATH}")") + source "${scripts_dir}/common.sh" +} + +# Stop HDFS services +stop_hdfs_services() { + if jps | grep -q "NameNode\|DataNode"; then + echo "Stopping HDFS..." + local hadoop_home="${E2E_TEST_HDFS_DIR}/hadoop-${E2E_TEST_HADOOP_VERSION}" + local hdfs_bin="${hadoop_home}/bin/hdfs" + [ ! -f "${hdfs_bin}" ] && err "HDFS binary not found at ${hdfs_bin}. However, HDFS services are running." + ${hdfs_bin} --daemon stop namenode + ${hdfs_bin} --daemon stop datanode + else + echo "HDFS is not running." + fi +} + +cleanup_hdfs_dir() { + rm -rf "${E2E_TEST_HDFS_DIR}" + echo "Removed HDFS directories." +} + +main() { + load_common_scripts + stop_hdfs_services + cleanup_hdfs_dir +} + +main diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/setup_hdfs.sh b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/setup_hdfs.sh new file mode 100755 index 000000000..10e979bf9 --- /dev/null +++ b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/setup_hdfs.sh @@ -0,0 +1,195 @@ +#!/bin/bash + +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script sets up and configures Hadoop HDFS. It optionally starts HDFS services +# based on the provided HDFS_SHOULD_RUN flag. +# +# HDFS Configuration: +# - Replication factor: 1 +# - Disk Space Quota: 2GB +# - Temp Directory: /tmp/spark_rapids_tools_e2e_tests +# +# Usage: ./setup_hdfs.sh --run|--no-run +# Options: +# --run Run HDFS services (default) +# --no-run Do not run HDFS + +set -e + +usage() { + echo "Usage: $0 --run|--no-run" >&2 + echo "Options:" + echo " --run Run HDFS services (default)" >&2 + echo " --no-run Do not run HDFS" >&2 + exit 1 +} + +if [ $# -eq 0 ]; then + usage +fi + +# Parse arguments +while [[ $# -gt 0 ]]; do + case "$1" in + --run) + readonly HDFS_SHOULD_RUN=true + shift + ;; + --no-run) + readonly HDFS_SHOULD_RUN=false + shift + ;; + *) + echo "Invalid option: $1" >&2 + usage + ;; + esac +done + +echo "HDFS_SHOULD_RUN: ${HDFS_SHOULD_RUN}" +readonly DEFAULT_CORE_SITE_XML="core-site.xml" +readonly DEFAULT_HDFS_SITE_XML="hdfs-site.xml" +readonly HDFS_SPACE_QUOTA="2g" +readonly CURRENT_FILE_PATH=$(realpath "${0}") +readonly HDFS_SCRIPTS_DIR=$(dirname "${CURRENT_FILE_PATH}") +readonly VERIFY_HDFS_SERVICES_MAX_RETRY=3 +readonly VERIFY_HDFS_SERVICES_SLEEP_SEC=5 + +load_common_scripts() { + local scripts_dir=$(dirname "${HDFS_SCRIPTS_DIR}") + source "${scripts_dir}/common.sh" +} + +# Validate environment variables and dependencies +validate_env() { + [ -z "${JAVA_HOME}" ] && err "JAVA_HOME is not set. Please set JAVA_HOME." + [ -z "${E2E_TEST_HADOOP_VERSION}" ] && err "E2E_TEST_HADOOP_VERSION is not set. Please set E2E_TEST_HADOOP_VERSION." + [ -z "${E2E_TEST_TMP_DIR}" ] && err "E2E_TEST_TMP_DIR is not set. Please set E2E_TEST_TMP_DIR (e.g. /tmp/spark_rapids_tools_e2e_tests)." + command -v jps >/dev/null || err "jps is not available. Please install JDK or add JDK bin directory to PATH." +} + +# Set up HDFS directories +setup_hdfs_dirs() { + echo "Setting up HDFS directories..." + readonly E2E_TEST_NAME_NODE_DIR="${E2E_TEST_HDFS_DIR}/namenode" + readonly E2E_TEST_DATA_NODE_DIR="${E2E_TEST_HDFS_DIR}/datanode" + rm -rf "${E2E_TEST_HDFS_DIR}" "${E2E_TEST_NAME_NODE_DIR}" "${E2E_TEST_DATA_NODE_DIR}" + mkdir -p "${E2E_TEST_HDFS_DIR}" "${E2E_TEST_NAME_NODE_DIR}" "${E2E_TEST_DATA_NODE_DIR}" + export E2E_TEST_NAME_NODE_DIR E2E_TEST_DATA_NODE_DIR +} + +# Function to verify checksum +verify_checksum() { + echo "Verifying checksum..." + if [ $# -ne 2 ]; then + err "verify_checksum requires two arguments: file and checksum_file." + fi + local file="$1" + local checksum_file="$2" + local expected_checksum=$(awk '{print $4}' "${checksum_file}") + local actual_checksum=$(shasum -a 512 "${file}" | awk '{print $1}') + [ "${expected_checksum}" != "${actual_checksum}" ] && return 1 || return 0 +} + +# Function to download and extract Hadoop +download_and_extract_hadoop() { + echo "Downloading and extracting Hadoop..." + local hadoop_url="https://dlcdn.apache.org/hadoop/common/hadoop-${E2E_TEST_HADOOP_VERSION}/hadoop-${E2E_TEST_HADOOP_VERSION}.tar.gz" + local hadoop_tar_file="${E2E_TEST_TMP_DIR}/hadoop-${E2E_TEST_HADOOP_VERSION}.tar.gz" + local checksum_url="${hadoop_url}.sha512" + local checksum_file="${hadoop_tar_file}.sha512" + + if [ ! -f "${hadoop_tar_file}" ]; then + wget -O "${hadoop_tar_file}" "${hadoop_url}" || err "Failed to download Hadoop tarball." + fi + + # Verify checksum and re-download if needed + wget -O "${checksum_file}" "${checksum_url}" || err "Failed to download checksum file." + if ! verify_checksum "${hadoop_tar_file}" "${checksum_file}"; then + wget -O "${hadoop_tar_file}" "${hadoop_url}" || err "Failed to download Hadoop tarball." + if ! verify_checksum "${hadoop_tar_file}" "${checksum_file}"; then + err "Checksum verification failed after re-downloading. Exiting..." + fi + fi + + tar -xzf "${hadoop_tar_file}" -C "${E2E_TEST_HDFS_DIR}" || err "Failed to extract Hadoop tarball." +} + +# Configure Hadoop +configure_hadoop() { + echo "Configuring Hadoop..." + readonly HADOOP_HOME="${E2E_TEST_HDFS_DIR}/hadoop-${E2E_TEST_HADOOP_VERSION}" + readonly HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop + export HADOOP_HOME HADOOP_CONF_DIR + export PATH="${PATH}:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin" + envsubst < "${HDFS_SCRIPTS_DIR}/templates/${DEFAULT_CORE_SITE_XML}" > "${HADOOP_HOME}/etc/hadoop/core-site.xml" + envsubst < "${HDFS_SCRIPTS_DIR}/templates/${DEFAULT_HDFS_SITE_XML}" > "${HADOOP_HOME}/etc/hadoop/hdfs-site.xml" +} + +# Format the Namenode +format_namenode() { + echo "Formatting the Namenode..." + yes | hdfs namenode -format || err "Failed to format Namenode." +} + +# Start HDFS services +start_hdfs_services() { + echo "Starting HDFS services..." + hdfs --daemon start namenode + hdfs --daemon start datanode +} + +# Verify that HDFS services are running +verify_hdfs_services() { + echo "Verifying HDFS services..." + jps | grep -q "NameNode" || err "Namenode is not running." + jps | grep -q "DataNode" || err "Datanode is not running." + hdfs dfs -ls / || err "Failed to list HDFS root directory." + hdfs dfsadmin -setSpaceQuota "${HDFS_SPACE_QUOTA}" / || err "Failed to set space quota of ${HDFS_SPACE_QUOTA}" + hdfs dfsadmin -report || err "Failed to get HDFS report." +} + +verify_hdfs_services_with_retry() { + local max_retry=$1 + local count=1 + while [[ ${count} -le ${max_retry} ]]; do + echo "Attempt ${count} of ${max_retry}..." + if verify_hdfs_services; then + echo "HDFS services are running." + return 0 + fi + echo "HDFS services verification failed. Retrying in ${VERIFY_HDFS_SERVICES_SLEEP_SEC} seconds..." + sleep ${VERIFY_HDFS_SERVICES_SLEEP_SEC} + ((count++)) + done + return 1 +} + +main() { + load_common_scripts + validate_env + setup_hdfs_dirs + download_and_extract_hadoop + configure_hadoop + if [ "${HDFS_SHOULD_RUN}" = true ]; then + format_namenode + start_hdfs_services + verify_hdfs_services_with_retry ${VERIFY_HDFS_SERVICES_MAX_RETRY} || err "Failed to start HDFS services after ${VERIFY_HDFS_SERVICES_MAX_RETRY} attempts." + fi + echo "${HADOOP_HOME}" +} + +main diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/templates/core-site.xml b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/templates/core-site.xml new file mode 100644 index 000000000..04958b67e --- /dev/null +++ b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/templates/core-site.xml @@ -0,0 +1,26 @@ + + + + + fs.defaultFS + hdfs://localhost:9000 + + + hadoop.tmp.dir + ${E2E_TEST_DATA_NODE_DIR} + + diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/templates/hdfs-site.xml b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/templates/hdfs-site.xml new file mode 100644 index 000000000..d68093caa --- /dev/null +++ b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/hdfs/templates/hdfs-site.xml @@ -0,0 +1,30 @@ + + + + + dfs.replication + 1 + + + dfs.namenode.name.dir + file:${E2E_TEST_NAME_NODE_DIR} + + + dfs.datanode.data.dir + file:${E2E_TEST_DATA_NODE_DIR} + + diff --git a/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/setup_env.sh b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/setup_env.sh new file mode 100755 index 000000000..c8c523213 --- /dev/null +++ b/user_tools/tests/spark_rapids_tools_e2e/resources/scripts/setup_env.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +err () { + echo "ERROR: $1" >&2 + exit 1 +} + +if [ -z "$E2E_TEST_TOOLS_DIR" ]; then + err "Please set E2E_TEST_TOOLS_DIR to the root directory of the spark-rapids-tools repository. Exiting script." +fi + +if [ -z "$E2E_TEST_SPARK_BUILD_VERSION" ]; then + err "Please set E2E_TEST_SPARK_BUILD_VERSION to the version of Spark used for building Tools JAR. Exiting script." +fi + +if [ -z "$E2E_TEST_HADOOP_VERSION" ]; then + err "Please set E2E_TEST_HADOOP_VERSION to the version of Hadoop used for building Tools JAR. Exiting script." +fi + +build_jar() { + local jar_tools_dir="$E2E_TEST_TOOLS_DIR/core" + echo "Building Spark RAPIDS Tools JAR file" + pushd "$jar_tools_dir" + mvn package -DskipTests -Dbuildver="$E2E_TEST_SPARK_BUILD_VERSION" -Dhadoop.version="$E2E_TEST_HADOOP_VERSION" + popd +} + +install_python_package() { + if [ -z "$E2E_TEST_VENV_DIR" ]; then + err "Please set E2E_TEST_VENV_DIR to the name of the virtual environment. Exiting script." + fi + + echo "Setting up Python environment in $E2E_TEST_VENV_DIR" + local python_tools_dir="$E2E_TEST_TOOLS_DIR/user_tools" + python -m venv "$E2E_TEST_VENV_DIR" + source "$E2E_TEST_VENV_DIR"/bin/activate + + echo "Installing Spark RAPIDS Tools Python package" + pushd "$python_tools_dir" + pip install --upgrade pip setuptools wheel + pip install . + popd +} + +# Check if the Tools JAR file exists or if the user wants to build it +if [ ! -f "$E2E_TEST_TOOLS_JAR_PATH" ] || [ "$E2E_TEST_BUILD_JAR" = "true" ]; then + build_jar +fi + +install_python_package diff --git a/user_tools/tox.ini b/user_tools/tox.ini index 835cfb999..b7a766d98 100644 --- a/user_tools/tox.ini +++ b/user_tools/tox.ini @@ -9,20 +9,22 @@ envlist = coverage pylint flake8 + behave isolated_build = True [gh-actions] python = - 3.8: python3.8, pylint, flake8 - 3.9: python3.9, pylint, flake8 - 3.10: python3.10, pylint, flake8 - 3.11: python3.11, pylint, flake8 + 3.8: python3.8, pylint, flake8, behave + 3.9: python3.9, pylint, flake8, behave + 3.10: python3.10, pylint, flake8, behave + 3.11: python3.11, pylint, flake8, behave [testenv] deps = pytest pytest-cov cli_test_helpers + behave setenv = COVERAGE_FILE = {env:COVERAGE_FILE:{toxworkdir}/.coverage.{envname}} commands = @@ -63,3 +65,23 @@ commands = flake8 \ extend-ignore = E501, exclude = .tox,build,dist + +[testenv:behave] +deps = behave +passenv = JAVA_HOME +commands = behave {posargs} + +[behave] +paths = tests/spark_rapids_tools_e2e/features +stderr_capture = false + +[behave.userdata] +# Default maven arguments for building the Tools JAR +buildver = 350 +hadoop.version = 3.3.6 +# Default arguments for the behave tests +scala_version = 2.12 +venv_name = spark_rapids_tools_e2e_tests_venv +setup_script_file = setup_env.sh +build_jar = true +e2e_tests_tmp_dir = /tmp/spark_rapids_tools_e2e_tests