diff --git a/soda-core/src/soda_core/cli/soda.py b/soda-core/src/soda_core/cli/soda.py index cd6152972..d1e2eaa55 100644 --- a/soda-core/src/soda_core/cli/soda.py +++ b/soda-core/src/soda_core/cli/soda.py @@ -1,9 +1,9 @@ from __future__ import annotations -import argparse import logging import sys import traceback +from argparse import ArgumentParser from os.path import dirname, exists from pathlib import Path from textwrap import dedent @@ -15,270 +15,290 @@ ContractVerificationResult -def configure_logging(verbose: bool): - sys.stderr = sys.stdout - logging.getLogger("urllib3").setLevel(logging.WARNING) - logging.getLogger("botocore").setLevel(logging.WARNING) - logging.getLogger("pyathena").setLevel(logging.WARNING) - logging.getLogger("faker").setLevel(logging.ERROR) - logging.getLogger("snowflake").setLevel(logging.WARNING) - logging.getLogger("matplotlib").setLevel(logging.WARNING) - logging.getLogger("pyspark").setLevel(logging.ERROR) - logging.getLogger("pyhive").setLevel(logging.ERROR) - logging.getLogger("py4j").setLevel(logging.INFO) - logging.getLogger("segment").setLevel(logging.WARNING) - - default_logging_level = logging.DEBUG if verbose else logging.INFO - logging.basicConfig( - level=default_logging_level, - force=True, # Override any previously set handlers. - # https://docs.python.org/3/library/logging.html#logrecord-attributes - # %(name)s - format="%(message)s", - handlers=[logging.StreamHandler(sys.stdout)], - ) - - -def verify_contract( - contract_file_paths: list[str] | None, - data_source_file_path: str | None, - soda_cloud_file_path: str | None, - skip_publish: bool, - use_agent: bool -): - contract_verification_builder: ContractVerificationBuilder = ContractVerification.builder() - - for contract_file_path in contract_file_paths: - contract_verification_builder.with_contract_yaml_file(contract_file_path) - - if data_source_file_path: - contract_verification_builder.with_data_source_yaml_file(data_source_file_path) - - if use_agent: - contract_verification_builder.with_execution_on_soda_agent() - - if soda_cloud_file_path: - contract_verification_builder.with_soda_cloud_yaml_file(soda_cloud_file_path) - - if skip_publish: - contract_verification_builder.with_soda_cloud_skip_publish() - - contract_verification_result: ContractVerificationResult = contract_verification_builder.execute() - if contract_verification_result.has_failures(): - exit(2) - elif contract_verification_result.has_errors(): - exit(3) - - -def publish_contract(contract_file_paths: list[str] | None): - print( - f"Publishing contracts {contract_file_paths}" - ) - - -def create_data_source(data_source_file_path: str, data_source_type: str): - print(f"Creating {data_source_type} data source YAML file '{data_source_file_path}'") - if exists(data_source_file_path): - print(f"Could not create data source file '{data_source_file_path}'. " - f"File already exists {Emoticons.POLICE_CAR_LIGHT}.") - return - if data_source_type != "postgres": - print(f"{Emoticons.POLICE_CAR_LIGHT} Only type postgres is supported atm") - return - dir: str = dirname(data_source_file_path) - Path(dir).mkdir(parents=True, exist_ok=True) - with open(data_source_file_path, "w") as text_file: - text_file.write(dedent( - """ - type: postgres - name: postgres_ds - connection: - host: localhost - user: ${POSTGRES_USERNAME} - password: ${POSTGRES_PASSWORD} - database: your_postgres_db - format_regexes: - # Example named regex format - single_digit_test_format: ^[0-9]$ - """ - ).strip()) - print(f"{Emoticons.WHITE_CHECK_MARK} Created data source file '{data_source_file_path}'") - - -# name has underscore otherwise pycharm thinks this is a unit test file -def _test_data_source(data_source_file_path: str): - print(f"Testing data source configuration file {data_source_file_path}") - from soda_core.common.data_source import DataSource - data_source: DataSource = DataSource.from_file(data_source_file_path) - error_message: Optional[str] = data_source.test_connection_error_message() - if error_message: - print(f"Could not connect {Emoticons.POLICE_CAR_LIGHT} using data source '{data_source_file_path}': " - f"{error_message}") - exit(2) - else: - print(f"Success! Connection in '{data_source_file_path}' tested ok. {Emoticons.WHITE_CHECK_MARK}") - - -def create_soda_cloud(soda_cloud_file_path: str): - print(f"Creating Soda Cloud YAML file '{soda_cloud_file_path}'") - if exists(soda_cloud_file_path): - print(f"Could not create soda cloud file '{soda_cloud_file_path}'. " - f"File already exists {Emoticons.POLICE_CAR_LIGHT}") - dir: str = dirname(soda_cloud_file_path) - Path(dir).mkdir(parents=True, exist_ok=True) - with open(soda_cloud_file_path, "w") as text_file: - text_file.write(dedent( - """ - soda_cloud: - host: cloud.soda.io - api_key_id: ${SODA_CLOUD_API_KEY_ID} - api_key_secret: ${SODA_CLOUD_API_KEY_SECRET} - """ - ).strip()) - print(f"{Emoticons.WHITE_CHECK_MARK} Created Soda Cloud configuration file '{soda_cloud_file_path}'") - - -# name has underscore otherwise pycharm thinks this is a unit test file -def _test_soda_cloud(soda_cloud_file_path: str): - from soda_core.common.soda_cloud import SodaCloud - print(f"Testing soda cloud file {soda_cloud_file_path}") - soda_cloud_yaml_source: YamlSource = YamlSource.from_file_path(soda_cloud_file_path) - soda_cloud_file_content: YamlFileContent = soda_cloud_yaml_source.parse_yaml_file_content( - file_type="soda_cloud", variables={}, logs=Logs() - ) - soda_cloud: SodaCloud = SodaCloud.from_file(soda_cloud_file_content) - error_msg = soda_cloud.test_connection() - if error_msg: - print(f"{Emoticons.POLICE_CAR_LIGHT} Could not connect to Soda Cloud: {error_msg}") - exit(3) - else: - print(f"{Emoticons.WHITE_CHECK_MARK} Success! Tested Soda Cloud credentials in '{soda_cloud_file_path}'") - - -def main(): - try: - print(dedent(""" - __| _ \| \ \\ - \__ \ ( | | _ \\ - ____/\___/___/_/ _\\ CLI 4.0.0.dev?? - """).strip("\n")) - - cli_parser = argparse.ArgumentParser(epilog="Run 'soda {command} -h' for help on a particular soda command") - - sub_parsers = cli_parser.add_subparsers(dest="command", help='Soda command description') - verify_parser = sub_parsers.add_parser('verify', help='Verify a contract') - - verify_parser.add_argument( - "-c", "--contract", - type=str, - nargs='+', - help="One or more contract file paths." - ) - verify_parser.add_argument( - "-ds", "--data-source", - type=str, - help="The data source configuration file." - ) - verify_parser.add_argument( - "-sc", "--soda-cloud", - type=str, - help="A Soda Cloud configuration file path." - ) - verify_parser.add_argument( - "-a", "--use-agent", - const=True, - action='store_const', - default=False, - help="Executes contract verification on Soda Agent instead of locally in this library." - ) - verify_parser.add_argument( - "-sp", "--skip-publish", - const=True, - action='store_const', - default=False, - help="Skips publishing of the contract when sending results to Soda Cloud. Precondition: The contract version " - "must already exist on Soda Cloud." - ) - verify_parser.add_argument( - "-v", "--verbose", - const=True, - action='store_const', - default=False, - help="Show more detailed logs on the console." +class CLI: + + def execute(self) -> None: + try: + print(dedent(""" + __| _ \| \ \\ + \__ \ ( | | _ \\ + ____/\___/___/_/ _\\ CLI 4.0.0.dev?? + """).strip("\n")) + + cli_parser = self._create_argument_parser("Run 'soda {command} -h' for help on a particular soda command") + + sub_parsers = cli_parser.add_subparsers(dest="command", help='Soda command description') + verify_parser = sub_parsers.add_parser('verify', help='Verify a contract') + + verify_parser.add_argument( + "-c", "--contract", + type=str, + nargs='+', + help="One or more contract file paths." + ) + verify_parser.add_argument( + "-ds", "--data-source", + type=str, + help="The data source configuration file." + ) + verify_parser.add_argument( + "-sc", "--soda-cloud", + type=str, + help="A Soda Cloud configuration file path." + ) + verify_parser.add_argument( + "-a", "--use-agent", + const=True, + action='store_const', + default=False, + help="Executes contract verification on Soda Agent instead of locally in this library." + ) + verify_parser.add_argument( + "-sp", "--skip-publish", + const=True, + action='store_const', + default=False, + help="Skips publishing of the contract when sending results to Soda Cloud. Precondition: The contract version " + "must already exist on Soda Cloud." + ) + verify_parser.add_argument( + "-v", "--verbose", + const=True, + action='store_const', + default=False, + help="Show more detailed logs on the console." + ) + + publish_parser = sub_parsers.add_parser('publish', help='Publish a contract (not yet implemented)') + publish_parser.add_argument( + "-c", "--contract", + type=str, + nargs='+', + help="One or more contract file paths." + ) + + create_data_source_parser = sub_parsers.add_parser( + name="create-data-source", + help="Create a data source YAML configuration file" + ) + create_data_source_parser.add_argument( + "-f", "--file", + type=str, + help="The path to the file to be created. (directories will be created if needed)" + ) + create_data_source_parser.add_argument( + "-t", "--type", + type=str, + default="postgres", + help="Type of the data source. Eg postgres" + ) + + test_parser = sub_parsers.add_parser('test-data-source', help='Test a data source connection') + test_parser.add_argument( + "-ds", "--data-source", + type=str, + help="The name of a configured data source to test." + ) + + create_soda_cloud_parser = sub_parsers.add_parser( + name="create-soda-cloud", + help="Create a Soda Cloud YAML configuration file" + ) + create_soda_cloud_parser.add_argument( + "-f", "--file", + type=str, + help="The path to the file to be created. (directories will be created if needed)" + ) + + test_parser = sub_parsers.add_parser('test-soda-cloud', help='Test the Soda Cloud connection') + test_parser.add_argument( + "-sc", "--soda-cloud", + type=str, + help="A Soda Cloud configuration file path." + ) + + args = cli_parser.parse_args() + + verbose = args.verbose if hasattr(args, "verbose") else False + self._configure_logging(verbose) + + if args.command == "verify": + self._verify_contract( + args.contract, args.data_source, args.soda_cloud, args.skip_publish, args.use_agent + ) + elif args.command == "publish": + self._publish_contract(args.contract) + elif args.command == "create-data-source": + self._create_data_source(args.file, args.type) + elif args.command == "test-data-source": + self._test_data_source(args.data_source) + elif args.command == "create-soda-cloud": + self._create_soda_cloud(args.file) + elif args.command == "test-soda-cloud": + self._test_soda_cloud(args.soda_cloud) + else: + cli_parser.print_help() + + except Exception as e: + traceback.print_exc() + self._end_with_exit_code(3) + + def _configure_logging(self, verbose: bool): + sys.stderr = sys.stdout + logging.getLogger("urllib3").setLevel(logging.WARNING) + logging.getLogger("botocore").setLevel(logging.WARNING) + logging.getLogger("pyathena").setLevel(logging.WARNING) + logging.getLogger("faker").setLevel(logging.ERROR) + logging.getLogger("snowflake").setLevel(logging.WARNING) + logging.getLogger("matplotlib").setLevel(logging.WARNING) + logging.getLogger("pyspark").setLevel(logging.ERROR) + logging.getLogger("pyhive").setLevel(logging.ERROR) + logging.getLogger("py4j").setLevel(logging.INFO) + logging.getLogger("segment").setLevel(logging.WARNING) + + default_logging_level = logging.DEBUG if verbose else logging.INFO + logging.basicConfig( + level=default_logging_level, + force=True, # Override any previously set handlers. + # https://docs.python.org/3/library/logging.html#logrecord-attributes + # %(name)s + format="%(message)s", + handlers=[logging.StreamHandler(sys.stdout)], ) - publish_parser = sub_parsers.add_parser('publish', help='Publish a contract (not yet implemented)') - publish_parser.add_argument( - "-c", "--contract", - type=str, - nargs='+', - help="One or more contract file paths." - ) - - create_data_source_parser = sub_parsers.add_parser( - name="create-data-source", - help="Create a data source YAML configuration file" - ) - create_data_source_parser.add_argument( - "-f", "--file", - type=str, - help="The path to the file to be created. (directories will be created if needed)" - ) - create_data_source_parser.add_argument( - "-t", "--type", - type=str, - default="postgres", - help="Type of the data source. Eg postgres" + def _verify_contract( + self, + contract_file_paths: list[str] | None, + data_source_file_path: str | None, + soda_cloud_file_path: str | None, + skip_publish: bool, + use_agent: bool + ): + contract_verification_builder: ContractVerificationBuilder = ContractVerification.builder() + + for contract_file_path in contract_file_paths: + contract_verification_builder.with_contract_yaml_file(contract_file_path) + + if data_source_file_path: + contract_verification_builder.with_data_source_yaml_file(data_source_file_path) + + if use_agent: + contract_verification_builder.with_execution_on_soda_agent() + + if soda_cloud_file_path: + contract_verification_builder.with_soda_cloud_yaml_file(soda_cloud_file_path) + + if skip_publish: + contract_verification_builder.with_soda_cloud_skip_publish() + + contract_verification_result: ContractVerificationResult = contract_verification_builder.execute() + if contract_verification_result.has_failures(): + self._end_with_exit_code(2) + elif contract_verification_result.has_errors(): + self._end_with_exit_code(3) + + return contract_verification_result + + def _publish_contract( + self, + contract_file_paths: list[str] | None + ): + print( + f"Publishing contracts {contract_file_paths}" ) - test_parser = sub_parsers.add_parser('test-data-source', help='Test a data source connection') - test_parser.add_argument( - "-ds", "--data-source", - type=str, - help="The name of a configured data source to test." - ) - - create_soda_cloud_parser = sub_parsers.add_parser( - name="create-soda-cloud", - help="Create a Soda Cloud YAML configuration file" - ) - create_soda_cloud_parser.add_argument( - "-f", "--file", - type=str, - help="The path to the file to be created. (directories will be created if needed)" - ) - - test_parser = sub_parsers.add_parser('test-soda-cloud', help='Test the Soda Cloud connection') - test_parser.add_argument( - "-sc", "--soda-cloud", - type=str, - help="A Soda Cloud configuration file path." + def _create_data_source( + self, + data_source_file_path: str, + data_source_type: str + ): + print(f"Creating {data_source_type} data source YAML file '{data_source_file_path}'") + if exists(data_source_file_path): + print(f"Could not create data source file '{data_source_file_path}'. " + f"File already exists {Emoticons.POLICE_CAR_LIGHT}.") + return + if data_source_type != "postgres": + print(f"{Emoticons.POLICE_CAR_LIGHT} Only type postgres is supported atm") + return + dir: str = dirname(data_source_file_path) + Path(dir).mkdir(parents=True, exist_ok=True) + with open(data_source_file_path, "w") as text_file: + text_file.write(dedent( + """ + type: postgres + name: postgres_ds + connection: + host: localhost + user: ${POSTGRES_USERNAME} + password: ${POSTGRES_PASSWORD} + database: your_postgres_db + format_regexes: + # Example named regex format + single_digit_test_format: ^[0-9]$ + """ + ).strip()) + print(f"{Emoticons.WHITE_CHECK_MARK} Created data source file '{data_source_file_path}'") + + def _test_data_source( + self, + data_source_file_path: str + ): + print(f"Testing data source configuration file {data_source_file_path}") + from soda_core.common.data_source import DataSource + data_source: DataSource = DataSource.from_file(data_source_file_path) + error_message: Optional[str] = data_source.test_connection_error_message() + if error_message: + print(f"Could not connect {Emoticons.POLICE_CAR_LIGHT} using data source '{data_source_file_path}': " + f"{error_message}") + exit(2) + else: + print(f"Success! Connection in '{data_source_file_path}' tested ok. {Emoticons.WHITE_CHECK_MARK}") + + def _create_soda_cloud( + self, + soda_cloud_file_path: str + ): + print(f"Creating Soda Cloud YAML file '{soda_cloud_file_path}'") + if exists(soda_cloud_file_path): + print(f"Could not create soda cloud file '{soda_cloud_file_path}'. " + f"File already exists {Emoticons.POLICE_CAR_LIGHT}") + dir: str = dirname(soda_cloud_file_path) + Path(dir).mkdir(parents=True, exist_ok=True) + with open(soda_cloud_file_path, "w") as text_file: + text_file.write(dedent( + """ + soda_cloud: + host: cloud.soda.io + api_key_id: ${SODA_CLOUD_API_KEY_ID} + api_key_secret: ${SODA_CLOUD_API_KEY_SECRET} + """ + ).strip()) + print(f"{Emoticons.WHITE_CHECK_MARK} Created Soda Cloud configuration file '{soda_cloud_file_path}'") + + def _test_soda_cloud( + self, + soda_cloud_file_path: str + ): + from soda_core.common.soda_cloud import SodaCloud + print(f"Testing soda cloud file {soda_cloud_file_path}") + soda_cloud_yaml_source: YamlSource = YamlSource.from_file_path(soda_cloud_file_path) + soda_cloud_file_content: YamlFileContent = soda_cloud_yaml_source.parse_yaml_file_content( + file_type="soda_cloud", variables={}, logs=Logs() ) - - args = cli_parser.parse_args() - - verbose = args.verbose if hasattr(args, "verbose") else False - configure_logging(verbose) - - if args.command == "verify": - verify_contract(args.contract, args.data_source, args.soda_cloud, args.skip_publish, args.use_agent) - elif args.command == "publish": - publish_contract(args.contract) - elif args.command == "create-data-source": - create_data_source(args.file, args.type) - elif args.command == "test-data-source": - _test_data_source(args.data_source) - elif args.command == "create-soda-cloud": - create_soda_cloud(args.file) - elif args.command == "test-soda-cloud": - _test_soda_cloud(args.soda_cloud) + soda_cloud: SodaCloud = SodaCloud.from_file(soda_cloud_file_content) + error_msg = soda_cloud.test_connection() + if error_msg: + print(f"{Emoticons.POLICE_CAR_LIGHT} Could not connect to Soda Cloud: {error_msg}") + exit(3) else: - cli_parser.print_help() + print(f"{Emoticons.WHITE_CHECK_MARK} Success! Tested Soda Cloud credentials in '{soda_cloud_file_path}'") + + def _end_with_exit_code(self, exit_code: int): + exit(exit_code) - except Exception as e: - traceback.print_exc() - exit(3) + def _create_argument_parser(self, epilog: str) -> ArgumentParser: + return ArgumentParser(epilog="Run 'soda {command} -h' for help on a particular soda command") if __name__ == "__main__": - main() + CLI().execute() diff --git a/soda-core/src/soda_core/contracts/impl/contract_yaml.py b/soda-core/src/soda_core/contracts/impl/contract_yaml.py index e2c654e0f..73c0c8c0c 100644 --- a/soda-core/src/soda_core/contracts/impl/contract_yaml.py +++ b/soda-core/src/soda_core/contracts/impl/contract_yaml.py @@ -143,8 +143,9 @@ def _parse_checks( elif isinstance(check_yaml_object, str): check_type_name = check_yaml_object self.logs.info( - f"{Emoticons.PINCHED_FINGERS} I'll be forgiving and ignore that you forgot the " - f"colon ':' behind the check '{check_type_name}'" + f"{Emoticons.PINCHED_FINGERS} Mama Mia! You forgot the " + f"colon ':' behind the check '{check_type_name}'. For this once I'll " + f"pretend I didn't see it {Emoticons.SEE_NO_EVIL}" ) if isinstance(check_type_name, str): if check_body_yaml_object is None: diff --git a/soda-core/tests/soda_core/tests/components/manual_cli_test.py b/soda-core/tests/soda_core/tests/components/manual_cli_test.py deleted file mode 100644 index 2b081a214..000000000 --- a/soda-core/tests/soda_core/tests/components/manual_cli_test.py +++ /dev/null @@ -1,14 +0,0 @@ -import sys - -from dotenv import load_dotenv - -from soda_core.cli.soda import main, configure_logging - -if __name__ == "__main__": - configure_logging(verbose=True) - - project_root_dir = __file__[: -len("/soda-core/tests/soda_core/tests/components/manual_cli_test.py")] - load_dotenv(f"{project_root_dir}/.env", override=True) - - sys.argv = ["soda", "verify", "-ds", "/Users/tom/Code/ccli/ds2.yml", "-sc", "/Users/tom/Code/ccli/sc.yml", "-c", "/Users/tom/Code/ccli/c2.yml"] - main() diff --git a/soda-core/tests/soda_core/tests/components/test_cli.py b/soda-core/tests/soda_core/tests/components/test_cli.py new file mode 100644 index 000000000..bb00d579b --- /dev/null +++ b/soda-core/tests/soda_core/tests/components/test_cli.py @@ -0,0 +1,117 @@ +import sys +import tempfile +from argparse import ArgumentParser +from typing import Optional + +from soda_core.cli.soda import CLI +from soda_core.contracts.contract_verification import ContractVerificationResult, ContractResult +from soda_core.tests.helpers.data_source_test_helper import DataSourceTestHelper +from soda_core.tests.helpers.test_functions import dedent_and_strip +from soda_core.tests.helpers.test_table import TestTableSpecification + + +class CLI4Test(CLI): + + def __init__(self, argv: list[str]): + super().__init__() + self.contract_file_paths: Optional[list[str]] = None + self.data_source_file_path: Optional[str] = None + self.soda_cloud_file_path: Optional[str] = None + self.skip_publish: Optional[bool] = None + self.use_agent: Optional[bool] = None + self.exit_code: int = 0 + self.argv: list[str] = argv + self.contract_verification_result: Optional[ContractVerificationResult] = None + + def execute(self) -> None: + sys.argv = self.argv + super().execute() + + def _verify_contract( + self, + contract_file_paths: Optional[list[str]], + data_source_file_path: Optional[str], + soda_cloud_file_path: Optional[str], + skip_publish: bool, + use_agent: bool + ): + self.contract_file_paths = contract_file_paths + self.data_source_file_path = data_source_file_path + self.soda_cloud_file_path = soda_cloud_file_path + self.skip_publish = skip_publish + self.use_agent = use_agent + + self.contract_verification_result = super()._verify_contract( + contract_file_paths, data_source_file_path, soda_cloud_file_path, skip_publish, use_agent + ) + + def _configure_logging(self, verbose: bool): + pass + + def _end_with_exit_code(self, exit_code: int) -> None: + self.exit_code = exit_code + + def _create_argument_parser(self, epilog: str) -> ArgumentParser: + return ArgumentParser4Test(epilog=epilog) + + +class ArgumentParser4Test(ArgumentParser): + + def exit(self, status=0, message=None): + print(f"Skipping exit in unit test status={status}, message={message}") + + +test_table_specification = ( + TestTableSpecification.builder() + .table_purpose("cli") + .column_text("id") + .rows(rows=[ + ("1",), + ("2",), + ("3",), + ]) + .build() +) + + +def test_cli(data_source_test_helper: DataSourceTestHelper): + test_table = data_source_test_helper.ensure_test_table(test_table_specification) + + contract_yaml_str: str = dedent_and_strip(f""" + data_source: postgres_test_ds + dataset: {test_table.unique_name} + dataset_prefix: {data_source_test_helper.dataset_prefix} + columns: + - name: id + checks: + - row_count: + threshold: + must_be: 3 + - schema: + """) + + data_source_yaml_str: str = dedent_and_strip(""" + type: postgres + name: postgres_test_ds + connection: + host: localhost + user: soda_test + database: soda_test + format_regexes: + single_digit_test_format: ^[0-9]$ + """) + + contract_tmp_file = tempfile.NamedTemporaryFile() + with open(contract_tmp_file.name, 'w') as f: + f.write(contract_yaml_str) + + data_source_tmp_file = tempfile.NamedTemporaryFile() + with open(data_source_tmp_file.name, 'w') as f: + f.write(data_source_yaml_str) + + test_cli: CLI4Test = CLI4Test(["soda", "verify", "-ds", data_source_tmp_file.name, "-c", contract_tmp_file.name]) + test_cli.execute() + assert test_cli.exit_code == 0 + assert test_cli.contract_verification_result.is_ok() + contract_result: ContractResult = test_cli.contract_verification_result.contract_results[0] + assert len(contract_result.check_results) == 2