From f72e9b221f27715e0da54f95c15995ee4a253c9f Mon Sep 17 00:00:00 2001 From: Alexander Butler <41213451+z3z1ma@users.noreply.github.com> Date: Tue, 5 Oct 2021 01:10:31 -0700 Subject: [PATCH] feat: Refactor CLI - Simplify Config - Allow Env Vars - Add Config Command (#65) * large refactor to use click as cli interface, improve and further abstract interface complexity, allow env vars implicitly * ephemeral model skip should be debug level like rest * full implementation of yaml config support in tandem to env var support, iteractive config generation and updates * add config load func to utils * add config global, override option process method to include config lookup which lets us reimplement required args * utility function to list env vars, validate in use env vars, inject env vars to render runtime config * last linting and typing updates and more robust type handler for config prompt lists * specify encoding for all file handlers * explicitly specify lru cache size * remove incorrect underscore in kwarg * update interface to inherit from config objects * little comment for clarity * readme updates to programmatic invocation and config * fix typo * fixed same type * remove uneeded sentence * add small note for manual config yml create and insight into layout. * rst needs a break between code-block decl and code * add method to use the resolved parser prop and execute read_models directly from interface * reflect dbt interface simplification in readme Co-authored-by: Falador_wiz1 --- README.rst | 111 +++- dbtmetabase/__init__.py | 978 +++++++++++++++++++--------- dbtmetabase/bin/dbt-metabase | 3 +- dbtmetabase/logger/__init__.py | 0 dbtmetabase/logger/logging.py | 76 +++ dbtmetabase/metabase.py | 133 ++-- dbtmetabase/models/config.py | 84 ++- dbtmetabase/models/exceptions.py | 18 + dbtmetabase/models/interface.py | 134 ++++ dbtmetabase/models/metabase.py | 19 +- dbtmetabase/parsers/dbt_folder.py | 189 +++--- dbtmetabase/parsers/dbt_manifest.py | 169 +++-- dbtmetabase/utils.py | 15 + requirements.txt | 2 + tests/__init__.py | 3 +- tests/test_dbt_folder_reader.py | 265 -------- tests/test_dbt_manifest_reader.py | 266 -------- tests/test_dbt_parsers.py | 535 +++++++++++++++ tests/test_metabase.py | 31 +- tests/utils_mb_test_suite.py | 12 +- 20 files changed, 1903 insertions(+), 1140 deletions(-) create mode 100644 dbtmetabase/logger/__init__.py create mode 100644 dbtmetabase/logger/logging.py create mode 100644 dbtmetabase/models/exceptions.py create mode 100644 dbtmetabase/models/interface.py delete mode 100644 tests/test_dbt_folder_reader.py delete mode 100644 tests/test_dbt_manifest_reader.py create mode 100644 tests/test_dbt_parsers.py diff --git a/README.rst b/README.rst index 7d3a95b8..5bf75955 100644 --- a/README.rst +++ b/README.rst @@ -309,6 +309,53 @@ You can control this behavior with two arguments: * ``--metabase_sync_timeout`` - number of seconds to wait and re-check data model before giving up +Configuration +------------- + +.. code-block:: shell + + dbt-metabase config + +Using the above command, you can enter an interactive configuration session where you can cache default selections +for arguments. This creates a ``config.yml`` in ~/.dbt-metabase. This is particularly useful for arguments which are repeated on every invocation like metabase_user, metabase_host, +metabase_password, dbt_manifest_path, etc. + +In addition, there are a few injected env vars that make deploying dbt-metabase in a CI/CD environment simpler without exposing +secrets. Listed below are acceptable env vars which correspond to their CLI flags: + +* ``DBT_DATABASE`` +* ``DBT_PATH`` +* ``DBT_MANIFEST_PATH`` +* ``MB_USER`` +* ``MB_PASS`` +* ``MB_HOST`` +* ``MB_DATABASE`` + +If any one of the above is present in the environment, the corresponding CLI flag is not needed unless overriding +the environment value. In the absence of a CLI flag, dbt-metabase will first look to the environment for any +env vars to inject, then we will look to the config.yml for cached defaults. + +A ``config.yml`` can be created or updated manually as well if needed. The only +requirement is that it must be located in ~/.dbt-metabase. The layout is as follows: + +.. code-block:: yaml + + config: + dbt_database: reporting + dbt_manifest_path: /home/user/dbt/target/manifest.json + metabase_database: Reporting + metabase_host: reporting.metabase.io + metabase_user: user@source.co + metabase_password: ... + metabase_use_http: false + metabase_sync: true + metabase_sync_timeout: null + dbt_schema_excludes: + - development + - testing + dbt_excludes: + - test_monday_io_site_diff + Programmatic Invocation ----------------------- @@ -317,23 +364,10 @@ line. But if you prefer to call it from your code, here's how to do it: .. code-block:: python - import dbtmetabase + from dbtmetabase.models.interface import MetabaseInterface, DbtInterface - # Collect Args the Build Configs # - ################################## - - metabase_config = MetabaseConfig( - host=metabase_host, - user=metabase_user, - password=metabase_password, - use_http=metabase_use_http, - verify=metabase_verify, - database=metabase_database, - sync_skip=metabase_sync_skip, - sync_timeout=metabase_sync_timeout, - ) - - dbt_config = DbtConfig( + # Instantiate dbt interface + dbt = DbtInterface( path=dbt_path, manifest_path=dbt_manifest_path, database=dbt_database, @@ -343,24 +377,41 @@ line. But if you prefer to call it from your code, here's how to do it: excludes=dbt_excludes, ) - # Propagate models to Metabase # - ################################ + # Load models + dbt_models, aliases = dbt.read_models( + include_tags=dbt_include_tags, + docs_url=dbt_docs_url, + ) - dbtmetabase.models( - metabase_config=metabase_config, - dbt_config=dbt_config, - dbt_docs_url=dbt_docs, - dbt_include_tags=include_tags, + # Instantiate Metabase interface + metabase = MetabaseInterface( + host=metabase_host, + user=metabase_user, + password=metabase_password, + use_http=metabase_use_http, + verify=metabase_verify, + database=metabase_database, + sync=metabase_sync, + sync_timeout=metabase_sync_timeout, ) - # Parse exposures from Metabase into dbt yml # - ############################################## + # Load client + metabase.prepare_metabase_client(dbt_models) + + # Propagate models to Metabase + metabase.client.export_models( + database=metabase.database, + models=dbt_models, + aliases=aliases, + ) - dbtmetabase.exposures( - metabase_config=metabase_config, - dbt_config=dbt_config, - output_path=output_path, - output_name=output_name, + # Parse exposures from Metabase into dbt schema yml + metabase.client.extract_exposures( + models=dbt_models, + output_path=output_path, + output_name=output_name, + include_personal_collections=include_personal_collections, + collection_excludes=collection_excludes, ) diff --git a/dbtmetabase/__init__.py b/dbtmetabase/__init__.py index 52ee9ebc..1d749968 100644 --- a/dbtmetabase/__init__.py +++ b/dbtmetabase/__init__.py @@ -1,376 +1,720 @@ import logging -import sys +import functools +from pathlib import Path +from typing import Iterable, Optional, Callable, Any import os -import argparse -from .metabase import MetabaseClient -from .parsers.dbt_folder import DbtFolderReader -from .parsers.dbt_manifest import DbtManifestReader -from .models.config import MetabaseConfig, DbtConfig -from .utils import get_version +import click +import yaml -from typing import Iterable, List, Union, Optional +from .logger import logging as package_logger +from .models.interface import MetabaseInterface, DbtInterface +from .utils import get_version, load_config -__version__ = get_version() - - -def models( - metabase_config: MetabaseConfig, - dbt_config: DbtConfig, - dbt_include_tags: bool = True, - dbt_docs_url: Optional[str] = None, -): - """Exports models from dbt to Metabase. - - Args: - metabase_config (str): Source database name. - dbt_config (str): Target Metabase database name. Database in Metabase is aliased. - dbt_include_tags (bool, optional): Append the dbt tags to the end of the table description. Defaults to True. - dbt_docs_url (str, optional): URL to your dbt docs hosted catalog, a link will be appended to the model description (only works for manifest parsing). Defaults to None. - dbt_includes (Iterable, optional): Model names to limit processing to. Defaults to None. - dbt_excludes (Iterable, optional): Model names to exclude. Defaults to None. - """ - # Assertions - if dbt_config.path and dbt_config.manifest_path: - logging.warning("Prioritizing manifest path arg") - dbt_config.path = None - if dbt_config.path and not dbt_config.schema: - logging.error( - "Must supply a schema if using YAML parser, it is used to resolve foreign key relations and which Metabase models to propagate documentation to" - ) - if dbt_config.path: - if dbt_config.database: - logging.info( - "Argument --database %s is unused in dbt_project yml parser. Use manifest parser instead.", - dbt_config.database, - ) - if dbt_docs_url: - logging.info( - "Argument --dbt_docs_url %s is unused in dbt_project yml parser. Use manifest parser instead.", - dbt_docs_url, - ) - - # Instantiate Metabase client - mbc = MetabaseClient( - host=metabase_config.host, - user=metabase_config.user, - password=metabase_config.password, - use_http=metabase_config.use_http, - verify=metabase_config.verify, - ) - reader: Union[DbtFolderReader, DbtManifestReader] - - # Resolve dbt reader being either YAML or manifest.json based - if dbt_config.manifest_path: - reader = DbtManifestReader(os.path.expandvars(dbt_config.manifest_path)) - elif dbt_config.path: - reader = DbtFolderReader(os.path.expandvars(dbt_config.path)) - - if dbt_config.schema_excludes: - dbt_config.schema_excludes = { - _schema.upper() for _schema in dbt_config.schema_excludes - } - - # Process dbt stuff - dbt_models = reader.read_models( - database=dbt_config.database, - schema=dbt_config.schema, - schema_excludes=dbt_config.schema_excludes, - includes=dbt_config.includes, - excludes=dbt_config.excludes, - include_tags=dbt_include_tags, - docs_url=dbt_docs_url, - ) +__version__ = get_version() - # Sync and attempt schema alignment prior to execution; if timeout is not explicitly set, proceed regardless of success - if not metabase_config.sync_skip: - if metabase_config.sync_timeout is not None and not mbc.sync_and_wait( - metabase_config.database, - dbt_models, - metabase_config.sync_timeout, +CONFIG = load_config() +ENV_VARS = [ + "DBT_DATABASE", + "DBT_PATH", + "DBT_MANIFEST_PATH", + "MB_USER", + "MB_PASS", + "MB_HOST", + "MB_DATABASE", +] + + +class MultiArg(click.Option): + """This class lets us pass multiple arguments after an options, equivalent to nargs=*""" + + def __init__(self, *args, **kwargs): + nargs = kwargs.pop("nargs", -1) + assert nargs == -1, "nargs, if set, must be -1 not {}".format(nargs) + super(MultiArg, self).__init__(*args, **kwargs) + self._previous_parser_process = None + self._eat_all_parser = None + + def add_to_parser(self, parser, ctx): + def parser_process(value, state): + # Method to hook to the parser.process + done = False + value = [value] + # Grab everything up to the next option + while state.rargs and not done: + for prefix in self._eat_all_parser.prefixes: + if state.rargs[0].startswith(prefix): + done = True + if not done: + value.append(state.rargs.pop(0)) + value = tuple(value) + + # Call the actual process + self._previous_parser_process(value, state) + + super().add_to_parser(parser, ctx) + for name in self.opts: + # pylint: disable=protected-access + our_parser = parser._long_opt.get(name) or parser._short_opt.get(name) + if our_parser: + self._eat_all_parser = our_parser + self._previous_parser_process = our_parser.process + our_parser.process = parser_process + break + + return + + +class ListParam(click.Tuple): + def __init__(self) -> None: + self.type = click.STRING + super().__init__([]) + + def convert( + self, value: Any, param: Optional[click.Parameter], ctx: Optional[click.Context] + ) -> Any: + len_value = len(value) + types = [self.type] * len_value + + return list(ty(x, param, ctx) for ty, x in zip(types, value)) + + +class OptionAcceptableFromConfig(click.Option): + """This class override should be used on arguments that are marked `required=True` in order to give them + more resilence to raising an error when the option exists in the users config""" + + def process_value(self, ctx: click.Context, value: Any) -> Any: + if value is not None: + value = self.type_cast_value(ctx, value) + + if self.required and self.value_is_missing(value): + if self.name not in CONFIG: + raise click.core.MissingParameter(ctx=ctx, param=self) + value = CONFIG[self.name] + + if self.callback is not None: + value = self.callback(ctx, self, value) + + return value + + +class CommandController(click.Command): + """This class inherets from click.Command and supplies custom help text renderer to + render our docstrings a little prettier as well as a hook in the invoke to load from a config file if it exists.""" + + def invoke(self, ctx: click.Context): + + if CONFIG: + for param, value in ctx.params.items(): + if value is None and param in CONFIG: + ctx.params[param] = CONFIG[param] + + return super().invoke(ctx) + + def get_help(self, ctx: click.Context): + orig_wrap_test = click.formatting.wrap_text + + def wrap_text( + text: str, + width: int = 78, + initial_indent: str = "", + subsequent_indent: str = "", + preserve_paragraphs: bool = False, ): - logging.critical("Sync timeout reached, models still not compatible") - return + del preserve_paragraphs + return orig_wrap_test( + text.replace("\n", "\n\n"), + width, + initial_indent=initial_indent, + subsequent_indent=subsequent_indent, + preserve_paragraphs=True, + ).replace("\n\n", "\n") - # Process Metabase stuff - mbc.export_models( - database=metabase_config.database, - models=dbt_models, - aliases=reader.catch_aliases, - ) + click.formatting.wrap_text = wrap_text + return super().get_help(ctx) -def exposures( - metabase_config: MetabaseConfig, - dbt_config: DbtConfig, - output_path: str, - output_name: str, - include_personal_collections: bool = False, - collection_excludes: Optional[Iterable] = None, -): - """Extracts and imports exposures from Metabase to dbt. +def shared_opts(func: Callable) -> Callable: + """Here we define the options shared across subcommands Args: - metabase_config (str): Source database name. - dbt_config (str): Target Metabase database name. Database in Metabase is aliased. - output_path (str): Append the dbt tags to the end of the table description. Defaults to True. - output_name (str): URL to your dbt docs hosted catalog, a link will be appended to the model description (only works for manifest parsing). Defaults to None. - include_personal_collections (bool, optional): Model names to limit processing to. Defaults to None. - collection_excludes (Iterable, optional): Model names to exclude. Defaults to None. - """ - - # Assertions - if dbt_config.path and dbt_config.manifest_path: - logging.warning("Prioritizing manifest path arg") - dbt_config.path = None - if dbt_config.path and not dbt_config.schema: - logging.error( - "Must supply a schema if using YAML parser, it is used to resolve foreign key relations and which Metabase models to propagate documentation to" - ) - - # Instantiate Metabase client - mbc = MetabaseClient( - host=metabase_config.host, - user=metabase_config.user, - password=metabase_config.password, - use_http=metabase_config.use_http, - verify=metabase_config.verify, - ) - reader: Union[DbtFolderReader, DbtManifestReader] - - # Resolve dbt reader being either YAML or manifest.json based - if dbt_config.manifest_path: - reader = DbtManifestReader(os.path.expandvars(dbt_config.manifest_path)) - elif dbt_config.path: - reader = DbtFolderReader(os.path.expandvars(dbt_config.path)) - - if dbt_config.schema_excludes: - dbt_config.schema_excludes = { - _schema.upper() for _schema in dbt_config.schema_excludes - } - - # Process dbt stuff - dbt_models = reader.read_models( - database=dbt_config.database, - schema=dbt_config.schema, - schema_excludes=dbt_config.schema_excludes, - includes=dbt_config.includes, - excludes=dbt_config.excludes, - ) - - # Sync and attempt schema alignment prior to execution; if timeout is not explicitly set, proceed regardless of success - if not metabase_config.sync_skip: - if metabase_config.sync_timeout is not None and not mbc.sync_and_wait( - metabase_config.database, - dbt_models, - metabase_config.sync_timeout, - ): - logging.critical("Sync timeout reached, models still not compatible") - return - - # Process Metabase stuff - mbc.extract_exposures( - models=dbt_models, - output_path=output_path, - output_name=output_name, - include_personal_collections=include_personal_collections, - collection_excludes=collection_excludes, - ) - - -def main(args: List = None): - logging.basicConfig( - format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO - ) - - parser = argparse.ArgumentParser( - prog="PROG", description="Model synchronization from dbt to Metabase." - ) - - parser.add_argument( - "--version", - action="version", - version=f"%(prog)s {__version__}", - ) - - # Commands - parser.add_argument( - "command", choices=["models", "exposures"], help="Command to execute" - ) + func (Callable): Wraps a subcommand - parser_dbt = parser.add_argument_group("dbt Parser") - parser_metabase = parser.add_argument_group("Metabase Client") - parser_models = parser.add_argument_group("`models` optional arguments") - parser_exposures = parser.add_argument_group("`exposures` optional arguments") + Returns: + Callable: Subcommand with added options + """ - # dbt arguments - parser_dbt.add_argument( + @click.option( "--dbt_database", - metavar="DB", + envvar="DBT_DATABASE", + show_envvar=True, required=True, - help="Target database name as specified in dbt", + cls=OptionAcceptableFromConfig, + help="Target database name as specified in dbt models to be actioned", + type=click.STRING, ) - group = parser_dbt.add_mutually_exclusive_group() - group.add_argument( + @click.option( "--dbt_path", - help="Path to dbt project. Cannot be specified with --dbt_manifest_path", + envvar="DBT_PATH", + show_envvar=True, + help="Path to dbt project. If specified with --dbt_manifest_path, then the manifest is prioritized", + type=click.Path(exists=True, file_okay=False, dir_okay=True), ) - group.add_argument( + @click.option( "--dbt_manifest_path", - help="Path to dbt manifest.json (typically located in the /target/ directory of the dbt project directory). Cannot be specified with --dbt_path", + envvar="DBT_MANIFEST_PATH", + show_envvar=True, + help="Path to dbt manifest.json file (typically located in the /target/ directory of the dbt project)", + type=click.Path(exists=True, file_okay=True, dir_okay=False), ) - parser_dbt.add_argument( + @click.option( "--dbt_schema", help="Target schema. Should be passed if using folder parser", + type=click.STRING, ) - parser_dbt.add_argument( + @click.option( "--dbt_schema_excludes", - nargs="*", - default=[], - help="Target schemas to exclude. Ignored in folder parser", + metavar="SCHEMAS", + cls=MultiArg, + type=list, + help="Target schemas to exclude. Ignored in folder parser. Accepts multiple arguments after the flag", ) - parser_dbt.add_argument( + @click.option( "--dbt_includes", metavar="MODELS", - nargs="*", - default=[], + cls=MultiArg, + type=list, help="Model names to limit processing to", ) - parser_dbt.add_argument( + @click.option( "--dbt_excludes", metavar="MODELS", - nargs="*", - default=[], + cls=MultiArg, + type=list, help="Model names to exclude", ) - - # Metabase arguments - parser_metabase.add_argument( + @click.option( "--metabase_database", - metavar="DB", + envvar="MB_DATABASE", + show_envvar=True, required=True, + cls=OptionAcceptableFromConfig, + type=click.STRING, help="Target database name as set in Metabase (typically aliased)", ) - parser_metabase.add_argument( - "--metabase_host", metavar="HOST", required=True, help="Metabase hostname" - ) - parser_metabase.add_argument( - "--metabase_user", metavar="USER", required=True, help="Metabase username" - ) - parser_metabase.add_argument( - "--metabase_password", metavar="PASS", required=True, help="Metabase password" + @click.option( + "--metabase_host", + metavar="HOST", + envvar="MB_HOST", + show_envvar=True, + required=True, + cls=OptionAcceptableFromConfig, + type=click.STRING, + help="Metabase hostname", + ) + @click.option( + "--metabase_user", + metavar="USER", + envvar="MB_USER", + show_envvar=True, + required=True, + cls=OptionAcceptableFromConfig, + type=click.STRING, + help="Metabase username", + ) + @click.option( + "--metabase_password", + metavar="PASS", + envvar="MB_PASSWORD", + show_envvar=True, + required=True, + cls=OptionAcceptableFromConfig, + type=click.STRING, + help="Metabase password", ) - parser_metabase.add_argument( - "--metabase_use_http", - action="store_true", - help="use HTTP to connect to Metabase instead of HTTPS", + @click.option( + "--metabase_http/--metabase_https", + "metabase_use_http", + default=False, + help="use HTTP or HTTPS to connect to Metabase. Default HTTPS", ) - parser_metabase.add_argument( + @click.option( "--metabase_verify", metavar="CERT", + type=click.Path(exists=True, file_okay=True, dir_okay=False), help="Path to certificate bundle used by Metabase client", ) - parser_metabase.add_argument( - "--metabase_sync_skip", - action="store_true", - help="Skip synchronizing Metabase database before export", + @click.option( + "--metabase_sync/--metabase_sync_skip", + "metabase_sync", + default=True, + help="Attempt to synchronize Metabase schema with local models. Default sync", ) - parser_metabase.add_argument( + @click.option( "--metabase_sync_timeout", metavar="SECS", type=int, - help="Synchronization timeout (in secs). If set, we will fail hard on synchronization failure; if not set, we will proceed after attempting sync regardless of success", + help="Synchronization timeout (in secs). If set, we will fail hard on synchronization failure; if not set, we will proceed after attempting sync regardless of success. Only valid if sync is enabled", ) + @functools.wraps(func) + def wrapper(*args, **kwargs): + return func(*args, **kwargs) + + return wrapper + - # Models specific args - parser_models.add_argument( - "--dbt_docs_url", - metavar="URL", - help="Pass in URL to dbt docs site. Appends dbt docs URL for each model to Metabase table description (default None)", +@click.group() +@click.version_option(__version__) +def cli(): + """Model synchronization from dbt to Metabase.""" + ... + + +@click.command(cls=CommandController) +def check_config(): + package_logger.logger().info( + "Looking for configuration file in ~/.dbt-metabase :magnifying_glass_tilted_right:" ) - parser_models.add_argument( - "--dbt_include_tags", - action="store_true", - default=False, - help="Append tags to Table descriptions in Metabase (default False)", + package_logger.logger().info( + "...bootstrapping environmental variables :racing_car:" ) + any_found = False + for env in ENV_VARS: + if env in os.environ: + package_logger.logger().info("Injecting valid env var: %s", env) + param = env.lower().replace("mb_", "metabase_") + CONFIG[param] = os.environ[env] + any_found = True + if not any_found: + package_logger.logger().info("NO valid env vars found") - # Exposures specific args - parser_exposures.add_argument( - "--output_path", - default="./", - help="Path where generated YAML will be outputted (default local dir)", + if not CONFIG: + package_logger.logger().info( + "No configuration file or env vars found, run the `config` command to interactively generate one.", + ) + else: + package_logger.logger().info("Config rendered!") + package_logger.logger().info( + {k: (v if "pass" not in k else "****") for k, v in CONFIG.items()} + ) + + +@click.command(cls=CommandController) +def check_env(): + package_logger.logger().info("All valid env vars: %s", ENV_VARS) + any_found = False + for env in ENV_VARS: + if env in os.environ: + val = os.environ[env] if "pass" not in env.lower() else "****" + package_logger.logger().info("Found value for %s --> %s", env, val) + any_found = True + if not any_found: + package_logger.logger().info("None of the env vars found in environment") + + +@cli.command(cls=CommandController) +@click.option( + "--inspect", + is_flag=True, + help="Introspect your dbt-metabase config.", +) +@click.option( + "--resolve", + is_flag=True, + help="Introspect your dbt-metabase config automatically injecting env vars into the configuration overwriting config.yml defaults. Use this flag if you are using env vars and want to see the resolved runtime output.", +) +@click.option( + "--env", + is_flag=True, + help="List all valid env vars for dbt-metabase. Env vars are evaluated before the config.yml and thus take precendence if used.", +) +@click.pass_context +def config(ctx, inspect: bool = False, resolve: bool = False, env: bool = False): + """Interactively generate a config or validate an existing config.yml + + A config allows you to omit arguments which will be substituted with config defaults. This simplifies + the execution of dbt-metabase to simply calling a command in most cases. Ex `dbt-metabase models` + + Execute the `config` command with no flags to enter an interactive session to create or update a config.yml. + + The config.yml should be located in ~/.dbt-metabase/ + Valid keys include any parameter seen in a dbt-metabase --help function + Example: `dbt-metabase models --help` + """ + if inspect: + package_logger.logger().info( + {k: (v if "pass" not in k else "****") for k, v in CONFIG.items()} + ) + if resolve: + ctx.invoke(check_config) + if env: + ctx.invoke(check_env) + if inspect or resolve or env: + ctx.exit() + click.confirm( + "Confirming you want to build or modify a dbt-metabase config file?", abort=True + ) + package_logger.logger().info( + "Preparing interactive configuration :rocket: (note defaults denoted by [...] are pulled from your existing config if found)" + ) + config_path = Path.home() / ".dbt-metabase" + config_path.mkdir(parents=True, exist_ok=True) + config_file = {} + conf_name = None + if (config_path / "config.yml").exists(): + with open(config_path / "config.yml", "r", encoding="utf-8") as f: + config_file = yaml.safe_load(f).get("config", {}) + conf_name = "config.yml" + elif (config_path / "config.yaml").exists(): + with open(config_path / "config.yaml", "r", encoding="utf-8") as f: + config_file = yaml.safe_load(f).get("config", {}) + conf_name = "config.yaml" + else: + # Default config name + conf_name = "config.yml" + if not config_file: + package_logger.logger().info("Building config file! :hammer:") + else: + package_logger.logger().info("Modifying config file! :wrench:") + config_file["dbt_database"] = click.prompt( + "Please enter the name of your dbt Database", + default=config_file.get("dbt_database"), + show_default=True, + type=click.STRING, + ) + config_file["dbt_manifest_path"] = click.prompt( + "Please enter the path to your dbt manifest.json \ntypically located in the /target directory of the dbt project", + default=config_file.get("dbt_manifest_path"), + show_default=True, + type=click.Path(exists=True, file_okay=True, dir_okay=False, resolve_path=True), + ) + if click.confirm( + "Would you like to set some default schemas to exclude when no flags are provided?" + ): + config_file["dbt_schema_excludes"] = click.prompt( + "Target schemas to exclude separated by commas", + default=config_file.get("dbt_schema_excludes"), + show_default=True, + value_proc=lambda s: list(map(str.strip, s.split(","))), + type=click.UNPROCESSED, + ) + else: + config_file.pop("dbt_schema_excludes", None) + if click.confirm( + "Would you like to set some default dbt models to exclude when no flags are provided?" + ): + config_file["dbt_excludes"] = click.prompt( + "dbt model names to exclude separated by commas", + default=config_file.get("dbt_excludes"), + show_default=True, + value_proc=lambda s: list(map(str.strip, s.split(","))), + type=ListParam(), + ) + else: + config_file.pop("dbt_excludes", None) + config_file["metabase_database"] = click.prompt( + "Target database name as set in Metabase (typically aliased)", + default=config_file.get("metabase_database"), + show_default=True, + type=click.STRING, + ) + config_file["metabase_host"] = click.prompt( + "Metabase hostname, this is the URL without the protocol (HTTP/S)", + default=config_file.get("metabase_host"), + show_default=True, + type=click.STRING, + ) + config_file["metabase_user"] = click.prompt( + "Metabase username", + default=config_file.get("metabase_user"), + show_default=True, + type=click.STRING, + ) + config_file["metabase_password"] = click.prompt( + "Metabase password [hidden]", + default=config_file.get("metabase_password"), + hide_input=True, + show_default=False, + type=click.STRING, + ) + config_file["metabase_use_http"] = click.confirm( + "Use HTTP instead of HTTPS to connect to Metabase, unless testing locally we should be saying no here", + default=config_file.get("metabase_use_http", False), + show_default=True, + ) + if click.confirm("Would you like to set a custom certificate bundle to use?"): + config_file["metabase_verify"] = click.prompt( + "Path to certificate bundle used by Metabase client", + default=config_file.get("metabase_verify"), + show_default=True, + type=click.Path( + exists=True, file_okay=True, dir_okay=False, resolve_path=True + ), + ) + else: + config_file.pop("metabase_verify", None) + config_file["metabase_sync"] = click.confirm( + "Would you like to allow Metabase schema syncs by default? Best to say yes as there is little downside", + default=config_file.get("metabase_sync", True), + show_default=True, + ) + if config_file["metabase_sync"]: + config_file["metabase_sync_timeout"] = click.prompt( + "Synchronization timeout in seconds. If set, we will fail hard on synchronization failure; \nIf set to 0 or a negative number, we will proceed after attempting sync regardless of success", + default=config_file.get("metabase_sync_timeout", -1), + show_default=True, + value_proc=lambda i: None if int(i) <= 0 else int(i), + type=click.INT, + ) + else: + config_file.pop("metabase_sync_timeout", None) + output_config = {"config": config_file} + package_logger.logger().info( + "Config constructed -- writing config to ~/.dbt-metabase" + ) + package_logger.logger().info( + {k: (v if "pass" not in k else "****") for k, v in config_file.items()} + ) + with open(config_path / conf_name, "w", encoding="utf-8") as outfile: + yaml.dump( + output_config, + outfile, + default_flow_style=False, + allow_unicode=True, + sort_keys=False, + ) + + +@cli.command(cls=CommandController) +@shared_opts +@click.option( + "--dbt_docs_url", + metavar="URL", + type=click.STRING, + help="Pass in URL to dbt docs site. Appends dbt docs URL for each model to Metabase table description (default None)", +) +@click.option( + "--dbt_include_tags", + is_flag=True, + help="Flag to append tags to table descriptions in Metabase (default False)", +) +@click.option( + "-v", + "--verbose", + is_flag=True, + help="Flag which signals verbose output", +) +def models( + metabase_host: str, + metabase_user: str, + metabase_password: str, + metabase_database: str, + dbt_database: str, + dbt_path: Optional[str] = None, + dbt_manifest_path: Optional[str] = None, + dbt_schema: Optional[str] = None, + dbt_schema_excludes: Optional[Iterable] = None, + dbt_includes: Optional[Iterable] = None, + dbt_excludes: Optional[Iterable] = None, + metabase_use_http: bool = False, + metabase_verify: Optional[str] = None, + metabase_sync: bool = True, + metabase_sync_timeout: Optional[int] = None, + dbt_include_tags: bool = True, + dbt_docs_url: Optional[str] = None, + verbose: bool = False, +) -> None: + """Exports model documentation and semantic types from dbt to Metabase. + + \f + Args: + metabase_host (str): Metabase hostname + metabase_user (str): Metabase username + metabase_password (str): Metabase password + metabase_database (str): Target database name as set in Metabase (typically aliased) + dbt_database (str): Target database name as specified in dbt models to be actioned + dbt_path (Optional[str], optional): Path to dbt project. If specified with dbt_manifest_path, then the manifest is prioritized. Defaults to None. + dbt_manifest_path (Optional[str], optional): Path to dbt manifest.json file (typically located in the /target/ directory of the dbt project). Defaults to None. + dbt_schema (Optional[str], optional): Target schema. Should be passed if using folder parser. Defaults to None. + dbt_schema_excludes (Optional[Iterable], optional): Target schemas to exclude. Ignored in folder parser. Defaults to None. + dbt_includes (Optional[Iterable], optional): Model names to limit processing to. Defaults to None. + dbt_excludes (Optional[Iterable], optional): Model names to exclude. Defaults to None. + metabase_use_http (bool, optional): Use HTTP to connect to Metabase. Defaults to False. + metabase_verify (Optional[str], optional): Path to custom certificate bundle to be used by Metabase client. Defaults to None. + metabase_sync (bool, optional): Attempt to synchronize Metabase schema with local models. Defaults to True. + metabase_sync_timeout (Optional[int], optional): Synchronization timeout (in secs). If set, we will fail hard on synchronization failure; if not set, we will proceed after attempting sync regardless of success. Only valid if sync is enabled. Defaults to None. + dbt_include_tags (bool, optional): Flag to append tags to table descriptions in Metabase. Defaults to True. + dbt_docs_url (Optional[str], optional): Pass in URL to dbt docs site. Appends dbt docs URL for each model to Metabase table description. Defaults to None. + verbose (bool, optional): Flag which signals verbose output. Defaults to False. + """ + + # Set global logging level if verbose + if verbose: + package_logger.LOGGING_LEVEL = logging.DEBUG + + # Instantiate dbt interface + dbt = DbtInterface( + path=dbt_path, + manifest_path=dbt_manifest_path, + database=dbt_database, + schema=dbt_schema, + schema_excludes=dbt_schema_excludes, + includes=dbt_includes, + excludes=dbt_excludes, ) - parser_exposures.add_argument( - "--output_name", - default="metabase_exposures", - help="Used in Exposure extractor, name of generated YAML file (default metabase_exposures)", + + # Load models + dbt_models, aliases = dbt.read_models( + include_tags=dbt_include_tags, + docs_url=dbt_docs_url, ) - parser_exposures.add_argument( - "--include_personal_collections", - action="store_true", - default=False, - help="Include personal collections in exposure extraction (default False)", + + # Instantiate Metabase interface + metabase = MetabaseInterface( + host=metabase_host, + user=metabase_user, + password=metabase_password, + use_http=metabase_use_http, + verify=metabase_verify, + database=metabase_database, + sync=metabase_sync, + sync_timeout=metabase_sync_timeout, ) - parser_exposures.add_argument( - "--collection_excludes", - nargs="*", - default=[], - help="Exclude a list of collections from exposure parsing (default [])", + + # Load client + metabase.prepare_metabase_client(dbt_models) + + # Execute model export + metabase.client.export_models( + database=metabase.database, + models=dbt_models, + aliases=aliases, + ) + + +@cli.command(cls=CommandController) +@shared_opts +@click.option( + "--output_path", + type=click.Path(exists=True, file_okay=False, dir_okay=True, writable=True), + help="Output path for generated exposure yaml. Defaults to local dir.", + default=".", +) +@click.option( + "--output_name", + type=click.STRING, + help="Output name for generated exposure yaml. Defaults to metabase_exposures.yml", +) +@click.option( + "--include_personal_collections", + is_flag=True, + help="Flag to include Personal Collections during exposure parsing", +) +@click.option( + "--collection_excludes", + cls=MultiArg, + type=list, + help="Metabase collection names to exclude", +) +@click.option( + "-v", + "--verbose", + is_flag=True, + help="Flag which signals verbose output", +) +def exposures( + metabase_host: str, + metabase_user: str, + metabase_password: str, + metabase_database: str, + dbt_database: str, + dbt_path: Optional[str] = None, + dbt_manifest_path: Optional[str] = None, + dbt_schema: Optional[str] = None, + dbt_schema_excludes: Optional[Iterable] = None, + dbt_includes: Optional[Iterable] = None, + dbt_excludes: Optional[Iterable] = None, + metabase_use_http: bool = False, + metabase_verify: Optional[str] = None, + metabase_sync: bool = True, + metabase_sync_timeout: Optional[int] = None, + output_path: str = ".", + output_name: str = "metabase_exposures.yml", + include_personal_collections: bool = False, + collection_excludes: Optional[Iterable] = None, + verbose: bool = False, +) -> None: + """Extracts and imports exposures from Metabase to dbt. + + \f + Args: + metabase_host (str): Metabase hostname + metabase_user (str): Metabase username + metabase_password (str): Metabase password + metabase_database (str): Target database name as set in Metabase (typically aliased) + dbt_database (str): Target database name as specified in dbt models to be actioned + dbt_path (Optional[str], optional): Path to dbt project. If specified with dbt_manifest_path, then the manifest is prioritized. Defaults to None. + dbt_manifest_path (Optional[str], optional): Path to dbt manifest.json file (typically located in the /target/ directory of the dbt project). Defaults to None. + dbt_schema (Optional[str], optional): Target schema. Should be passed if using folder parser. Defaults to None. + dbt_schema_excludes (Optional[Iterable], optional): Target schemas to exclude. Ignored in folder parser. Defaults to None. + dbt_includes (Optional[Iterable], optional): Model names to limit processing to. Defaults to None. + dbt_excludes (Optional[Iterable], optional): Model names to exclude. Defaults to None. + metabase_use_http (bool, optional): Use HTTP to connect to Metabase. Defaults to False. + metabase_verify (Optional[str], optional): Path to custom certificate bundle to be used by Metabase client. Defaults to None. + metabase_sync (bool, optional): Attempt to synchronize Metabase schema with local models. Defaults to True. + metabase_sync_timeout (Optional[int], optional): Synchronization timeout (in secs). If set, we will fail hard on synchronization failure; if not set, we will proceed after attempting sync regardless of success. Only valid if sync is enabled. Defaults to None. + output_path (str): Output path for generated exposure yaml. Defaults to "." local dir. + output_name (str): Output name for generated exposure yaml. Defaults to metabase_exposures.yml. + include_personal_collections (bool, optional): Flag to include Personal Collections during exposure parsing. Defaults to False. + collection_excludes (Iterable, optional): Collection names to exclude. Defaults to None. + verbose (bool, optional): Flag which signals verbose output. Defaults to False. + """ + + # Set global logging level if verbose + if verbose: + package_logger.LOGGING_LEVEL = logging.DEBUG + + # Instantiate dbt interface + dbt = DbtInterface( + path=dbt_path, + manifest_path=dbt_manifest_path, + database=dbt_database, + schema=dbt_schema, + schema_excludes=dbt_schema_excludes, + includes=dbt_includes, + excludes=dbt_excludes, + ) + + # Load models + dbt_models, aliases = dbt.read_models() + del aliases # Unused in this particular function + + # Instantiate Metabase interface + metabase = MetabaseInterface( + host=metabase_host, + user=metabase_user, + password=metabase_password, + use_http=metabase_use_http, + verify=metabase_verify, + database=metabase_database, + sync=metabase_sync, + sync_timeout=metabase_sync_timeout, + ) + + # Load client + metabase.prepare_metabase_client(dbt_models) + + # Execute exposure extraction + metabase.client.extract_exposures( + models=dbt_models, + output_path=output_path, + output_name=output_name, + include_personal_collections=include_personal_collections, + collection_excludes=collection_excludes, ) - # Common/misc arguments - parser.add_argument( - "--verbose", - action="store_true", - default=False, - help="Verbose output", - ) - - parsed = parser.parse_args(args=args) - - if parsed.verbose: - logger = logging.getLogger() - logger.addHandler(logging.StreamHandler(sys.stdout)) - logger.setLevel(logging.DEBUG) - - # These args drive loading the Metabase client and dbt models and are prerequisites to any functionality of dbt-metabase - metabase_config = MetabaseConfig( - host=parsed.metabase_host, - user=parsed.metabase_user, - password=parsed.metabase_password, - use_http=parsed.metabase_use_http, - verify=parsed.metabase_verify, - database=parsed.metabase_database, - sync_skip=parsed.metabase_sync_skip, - sync_timeout=parsed.metabase_sync_timeout, - ) - dbt_config = DbtConfig( - path=parsed.dbt_path, - manifest_path=parsed.dbt_manifest_path, - database=parsed.dbt_database, - schema=parsed.dbt_schema, - schema_excludes=parsed.dbt_schema_excludes, - includes=parsed.dbt_includes, - excludes=parsed.dbt_excludes, - ) - - if parsed.command == "models": - models( - metabase_config, - dbt_config, - dbt_docs_url=parsed.dbt_docs_url, - dbt_include_tags=parsed.dbt_include_tags, - ) - elif parsed.command == "exposures": - exposures( - metabase_config, - dbt_config, - output_path=parsed.output_path, - output_name=parsed.output_name, - include_personal_collections=parsed.include_personal_collections, - collection_excludes=parsed.collection_excludes, - ) - else: - logging.error("Invalid command. Must be one of either 'models' or 'exposures'.") + +def main(): + # Valid kwarg + cli(max_content_width=600) # pylint: disable=unexpected-keyword-arg diff --git a/dbtmetabase/bin/dbt-metabase b/dbtmetabase/bin/dbt-metabase index 28bdccc9..24a863da 100755 --- a/dbtmetabase/bin/dbt-metabase +++ b/dbtmetabase/bin/dbt-metabase @@ -1,6 +1,5 @@ #!/usr/bin/env python -import sys import dbtmetabase if __name__ == "__main__": - dbtmetabase.main(sys.argv[1:]) + dbtmetabase.main() diff --git a/dbtmetabase/logger/__init__.py b/dbtmetabase/logger/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dbtmetabase/logger/logging.py b/dbtmetabase/logger/logging.py new file mode 100644 index 00000000..aee27dc0 --- /dev/null +++ b/dbtmetabase/logger/logging.py @@ -0,0 +1,76 @@ +import logging +from logging.handlers import RotatingFileHandler +from functools import lru_cache +from pathlib import Path +from typing import Optional, Union + +from rich.logging import RichHandler + + +# Log File Format +LOG_FILE_FORMAT = "%(asctime)s — %(name)s — %(levelname)s — %(message)s" + +# Log File Path +LOG_PATH = Path.home().absolute() / ".dbt-metabase" / "logs" + +# Console Output Level +LOGGING_LEVEL = logging.INFO + + +def rotating_log_handler( + name: str, + path: Path, + formatter: str, +) -> RotatingFileHandler: + """This handler writes warning and higher level outputs to logs in a home .dbt-metabase directory rotating them as needed""" + path.mkdir(parents=True, exist_ok=True) + handler = RotatingFileHandler( + str(path / "{log_name}.log".format(log_name=name)), + maxBytes=int(1e6), + backupCount=3, + ) + handler.setFormatter(logging.Formatter(formatter)) + handler.setLevel(logging.WARNING) + return handler + + +@lru_cache(maxsize=10) +def logger( + name: str = "dbtmetabase", + level: Optional[Union[int, str]] = None, + path: Optional[Path] = None, + formatter: Optional[str] = None, +) -> logging.Logger: + """Builds and caches loggers. Can be configured with module level attributes or on a call by call basis. + Simplifies logger management without having to instantiate separate pointers in each module. + + Args: + name (str, optional): Logger name, also used for output log file name in `~/.dbt-metabase/logs` directory. + level (Union[int, str], optional): Logging level, this is explicitly passed to console handler which effects what level of log messages make it to the console. Defaults to logging.INFO. + path (Path, optional): Path for output warning level+ log files. Defaults to `~/.dbt-metabase/logs` + formatter (str, optional): Format for output log files. Defaults to a "time — name — level — message" format + + Returns: + logging.Logger: Prepared logger with rotating logs and console streaming. Can be executed directly from function. + """ + if isinstance(level, str): + level = getattr(logging, level, logging.INFO) + if level is None: + level = LOGGING_LEVEL + if path is None: + path = LOG_PATH + if formatter is None: + formatter = LOG_FILE_FORMAT + _logger = logging.getLogger(name) + _logger.setLevel(level) + _logger.addHandler(rotating_log_handler(name, path, formatter)) + _logger.addHandler( + RichHandler( + level=level, + rich_tracebacks=True, + markup=True, + show_time=False, + ) + ) + _logger.propagate = False + return _logger diff --git a/dbtmetabase/metabase.py b/dbtmetabase/metabase.py index f947425b..952170f2 100644 --- a/dbtmetabase/metabase.py +++ b/dbtmetabase/metabase.py @@ -1,5 +1,10 @@ +import re import json -import logging +import requests +import time +import yaml +import os + from typing import ( Sequence, Optional, @@ -11,15 +16,9 @@ Mapping, ) -import requests -import time - +from .logger.logging import logger from .models.metabase import MetabaseModel, MetabaseColumn -import re -import yaml -import os - class MetabaseClient: """Metabase API client.""" @@ -59,7 +58,7 @@ def __init__( self.cte_parser = re.compile( r"[Ww][Ii][Tt][Hh]\s+\b(\w+)\b\s+as|[)]\s*[,]\s*\b(\w+)\b\s+as" ) - logging.info("Session established successfully") + logger().info(":ok_hand: Session established successfully") def get_session_id(self, user: str, password: str) -> str: """Obtains new session ID from API. @@ -101,7 +100,7 @@ def sync_and_wait( timeout = 30 if timeout < self._SYNC_PERIOD_SECS: - logging.critical( + logger().critical( "Timeout provided %d secs, must be at least %d", timeout, self._SYNC_PERIOD_SECS, @@ -110,7 +109,7 @@ def sync_and_wait( database_id = self.find_database_id(database) if not database_id: - logging.critical("Cannot find database by name %s", database) + logger().critical("Cannot find database by name %s", database) return False self.api("post", f"/api/database/{database_id}/sync_schema") @@ -148,7 +147,7 @@ def models_compatible(self, database_id: str, models: Sequence) -> bool: lookup_key = f"{schema_name}.{model_name}" if lookup_key not in field_lookup: - logging.warning( + logger().warning( "Model %s not found in %s schema", lookup_key, schema_name ) are_models_compatible = False @@ -157,7 +156,7 @@ def models_compatible(self, database_id: str, models: Sequence) -> bool: for column in model.columns: column_name = column.name.upper() if column_name not in table_lookup: - logging.warning( + logger().warning( "Column %s not found in %s model", column_name, lookup_key ) are_models_compatible = False @@ -180,7 +179,7 @@ def export_models( database_id = self.find_database_id(database) if not database_id: - logging.critical("Cannot find database by name %s", database) + logger().critical("Cannot find database by name %s", database) return table_lookup, field_lookup = self.build_metadata_lookups(database_id) @@ -211,7 +210,9 @@ def export_model( api_table = table_lookup.get(lookup_key) if not api_table: - logging.error("Table %s does not exist in Metabase", lookup_key) + logger().error( + "\n:cross_mark: Table %s does not exist in Metabase", lookup_key + ) return # Empty strings not accepted by Metabase @@ -228,11 +229,13 @@ def export_model( f"/api/table/{table_id}", json={"description": model_description}, ) - logging.info("Updated table %s successfully", lookup_key) + logger().info("\n:raising_hands: Updated table %s successfully", lookup_key) elif not model_description: - logging.info("No model description provided for table %s", lookup_key) + logger().info( + "\n:bow: No model description provided for table %s", lookup_key + ) else: - logging.info("Table %s is up-to-date", lookup_key) + logger().info("\n:thumbs_up: Table %s is up-to-date", lookup_key) for column in model.columns: self.export_column(schema_name, model_name, column, field_lookup, aliases) @@ -259,7 +262,7 @@ def export_column( field = field_lookup.get(table_lookup_key, {}).get(column_name) if not field: - logging.error( + logger().error( "Field %s.%s does not exist in Metabase", table_lookup_key, column_name ) return @@ -289,15 +292,13 @@ def export_column( ) if not target_table or not target_field: - logging.info( - "Passing on fk resolution for %s. Target field %s was not resolved during dbt model parsing.", + logger().info( + ":bow: Passing on fk resolution for %s. Target field %s was not resolved during dbt model parsing.", table_lookup_key, target_field, ) else: - # Now we can trust our parse_ref even if it is pointing to something like source("salesforce", "my_cool_table_alias") - # just as easily as a simple ref("stg_salesforce_cool_table") -> the dict is empty if parsing from manifest.json was_aliased = ( aliases.get(target_table.split(".", 1)[-1]) if target_table @@ -308,16 +309,19 @@ def export_column( [target_table.split(".", 1)[0], was_aliased] ) - logging.info( - "Looking for field %s in table %s", target_field, target_table + logger().debug( + ":magnifying_glass_tilted_right: Looking for field %s in table %s", + target_field, + target_table, ) + fk_target_field_id = ( field_lookup.get(target_table, {}).get(target_field, {}).get("id") ) if fk_target_field_id: - logging.info( - "Setting target field %s to PK in order to facilitate FK ref for %s column", + logger().info( + ":key: Setting target field %s to PK in order to facilitate FK ref for %s column", fk_target_field_id, column_name, ) @@ -327,8 +331,8 @@ def export_column( json={semantic_type: "type/PK"}, ) else: - logging.error( - "Unable to find foreign key target %s.%s", + logger().error( + ":cross_mark: Unable to find foreign key target %s.%s", target_table, target_field, ) @@ -343,6 +347,10 @@ def export_column( else: column_description = column.description + # Preserve this relationship by default + if api_field["fk_target_field_id"] is not None and fk_target_field_id is None: + fk_target_field_id = api_field["fk_target_field_id"] + if ( api_field["description"] != column_description or api_field[semantic_type] != column.semantic_type @@ -360,9 +368,13 @@ def export_column( "fk_target_field_id": fk_target_field_id, }, ) - logging.info("Updated field %s.%s successfully", model_name, column_name) + logger().info( + ":sparkles: Updated field %s.%s successfully", model_name, column_name + ) else: - logging.info("Field %s.%s is up-to-date", model_name, column_name) + logger().info( + ":thumbs_up: Field %s.%s is up-to-date", model_name, column_name + ) def find_database_id(self, name: str) -> Optional[str]: """Finds Metabase database ID by name. @@ -414,7 +426,7 @@ def build_metadata_lookups( } if table_schema in schemas_to_exclude: - logging.debug( + logger().debug( "Ignoring Metabase table %s in schema %s. It belongs to excluded schemas %s", table_name, table_schema, @@ -467,7 +479,7 @@ def increase_indent(self, flow=False, indentless=False): if collection_excludes is None: collection_excludes = [] - refable_models = {node.name: node.ref for node in models} + refable_models = {node.name.upper(): node.ref for node in models} self.collections = self.api("get", "/api/collection") self.tables = self.api("get", "/api/table") @@ -487,7 +499,7 @@ def increase_indent(self, flow=False, indentless=False): continue # Iter through collection - logging.info("Exploring collection %s", collection["name"]) + logger().info("\n\n:sparkles: Exploring collection %s", collection["name"]) for item in self.api("get", f"/api/collection/{collection['id']}/items"): # Ensure collection item is of parsable type @@ -503,7 +515,10 @@ def increase_indent(self, flow=False, indentless=False): exposure = self.api("get", f"/api/{exposure_type}/{exposure_id}") exposure_name = exposure.get("name", "Exposure [Unresolved Name]") - logging.info("Introspecting exposure: %s", exposure_name) + logger().info( + "\n:bow_and_arrow: Introspecting exposure: %s", + exposure_name, + ) # Process exposure if exposure_type == "card": @@ -514,7 +529,7 @@ def increase_indent(self, flow=False, indentless=False): ) # Parse Metabase question - self._extract_card_exposures(exposure_id, exposure) + self._extract_card_exposures(exposure_id, exposure, refable_models) native_query = self.native_query elif exposure_type == "dashboard": @@ -534,7 +549,13 @@ def increase_indent(self, flow=False, indentless=False): if "id" not in dashboard_item_reference: continue # Parse Metabase question - self._extract_card_exposures(dashboard_item_reference["id"]) + self._extract_card_exposures( + dashboard_item_reference["id"], + refable_models=refable_models, + ) + + if not self.models_exposed: + logger().info(":bow: No models mapped to exposure") # Extract creator info if "creator" in exposure: @@ -588,7 +609,12 @@ def increase_indent(self, flow=False, indentless=False): # Return object return {"version": _RESOURCE_VERSION, "exposures": parsed_exposures} - def _extract_card_exposures(self, card_id: int, exposure: Optional[Mapping] = None): + def _extract_card_exposures( + self, + card_id: int, + exposure: Optional[Mapping] = None, + refable_models: Optional[MutableMapping] = None, + ): """Extracts exposures from Metabase questions populating `self.models_exposed` Arguments: @@ -601,6 +627,9 @@ def _extract_card_exposures(self, card_id: int, exposure: Optional[Mapping] = No None -- self.models_exposed is populated through this method. """ + if refable_models is None: + refable_models = {} + # If an exposure is not passed, pull from id if not exposure: exposure = self.api("get", f"/api/card/{card_id}") @@ -615,13 +644,15 @@ def _extract_card_exposures(self, card_id: int, exposure: Optional[Mapping] = No if str(source_table_id).startswith("card__"): # Handle questions based on other question in virtual db - self._extract_card_exposures(int(source_table_id.split("__")[-1])) + self._extract_card_exposures( + int(source_table_id.split("__")[-1]), refable_models=refable_models + ) else: # Normal question source_table = self.table_map.get(source_table_id) if source_table: - logging.info( - "Model extracted from Metabase question: %s", + logger().info( + ":direct_hit: Model extracted from Metabase question: %s", source_table, ) self.models_exposed.append(source_table) @@ -632,15 +663,16 @@ def _extract_card_exposures(self, card_id: int, exposure: Optional[Mapping] = No # Handle questions based on other question in virtual db if str(query_join.get("source-table", "")).startswith("card__"): self._extract_card_exposures( - int(query_join.get("source-table").split("__")[-1]) + int(query_join.get("source-table").split("__")[-1]), + refable_models=refable_models, ) continue # Joined model parsed joined_table = self.table_map.get(query_join.get("source-table")) if joined_table: - logging.info( - "Model extracted from Metabase question join: %s", + logger().info( + ":direct_hit: Model extracted from Metabase question join: %s", joined_table, ) self.models_exposed.append(joined_table) @@ -660,13 +692,16 @@ def _extract_card_exposures(self, card_id: int, exposure: Optional[Mapping] = No # Grab just the table / model name clean_exposure = sql_ref.split(".")[-1].strip('"') - # Scrub CTEs for cleanliness sake + # Scrub CTEs if clean_exposure in ctes: continue + # Verify this is one of our parsed refable models so exposures dont break the DAG + if not refable_models.get(clean_exposure): + continue if clean_exposure: - logging.info( - "Model extracted from native query: %s", + logger().info( + ":direct_hit: Model extracted from native query: %s", clean_exposure, ) self.models_exposed.append(clean_exposure) @@ -794,9 +829,9 @@ def api( response.raise_for_status() except requests.exceptions.HTTPError: if "password" in kwargs["json"]: - logging.error("HTTP request failed. Response: %s", response.text) + logger().error("HTTP request failed. Response: %s", response.text) else: - logging.error( + logger().error( "HTTP request failed. Payload: %s. Response: %s", kwargs["json"], response.text, diff --git a/dbtmetabase/models/config.py b/dbtmetabase/models/config.py index 13209dfa..83463f42 100644 --- a/dbtmetabase/models/config.py +++ b/dbtmetabase/models/config.py @@ -1,30 +1,64 @@ -from dataclasses import dataclass, field from typing import Optional, Iterable, Union -@dataclass class MetabaseConfig: - # Metabase Client - database: str - host: str - user: str - password: str - # Metabase additional connection opts - use_http: bool = False - verify: Union[str, bool] = True - # Metabase Sync - sync_skip: bool = False - sync_timeout: Optional[int] = None - - -@dataclass + def __init__( + self, + database: str, + host: str, + user: str, + password: str, + use_http: bool = False, + verify: Optional[Union[str, bool]] = True, + sync: bool = True, + sync_timeout: Optional[int] = None, + ): + # Metabase Client + self.database = database + self.host = host + self.user = user + self.password = password + # Metabase additional connection opts + self.use_http = use_http + self.verify = verify + # Metabase Sync + self.sync = sync + self.sync_timeout = sync_timeout + + class DbtConfig: - # dbt Reader - database: str - manifest_path: Optional[str] = None - path: Optional[str] = None - # dbt Target Models - schema: Optional[str] = None - schema_excludes: Iterable = field(default_factory=list) - includes: Iterable = field(default_factory=list) - excludes: Iterable = field(default_factory=list) + def __init__( + self, + database: str, + manifest_path: Optional[str] = None, + path: Optional[str] = None, + schema: Optional[str] = None, + schema_excludes: Optional[Iterable] = None, + includes: Optional[Iterable] = None, + excludes: Optional[Iterable] = None, + ): + + if schema_excludes is None: + schema_excludes = [] + if includes is None: + includes = [] + if excludes is None: + excludes = [] + + # dbt Reader + self.database = database + self.manifest_path = manifest_path + self.path = path + # dbt Target Models + self.schema = schema + self._schema_excludes = schema_excludes + self.includes = includes + self.excludes = excludes + + @property + def schema_excludes(self) -> Iterable: + return self._schema_excludes + + @schema_excludes.setter + def schema_excludes(self, value: Iterable) -> None: + self._schema_excludes = list({schema.upper() for schema in value}) diff --git a/dbtmetabase/models/exceptions.py b/dbtmetabase/models/exceptions.py new file mode 100644 index 00000000..2b603791 --- /dev/null +++ b/dbtmetabase/models/exceptions.py @@ -0,0 +1,18 @@ +class NoDbtPathSupplied(Exception): + """Thrown when no argument for dbt path has been supplied""" + + +class NoDbtSchemaSupplied(Exception): + """Thrown when using folder parser without supplying a schema""" + + +class MetabaseClientNotInstantiated(Exception): + """Thrown when trying to access metabase client from interface prior to instantiation via class method""" + + +class MetabaseUnableToSync(Exception): + """Thrown when Metabase cannot sync / align models with dbt model""" + + +class DbtParserNotInstantiated(Exception): + """Thrown when trying to access dbt reader from interface prior to instantiation via class method""" diff --git a/dbtmetabase/models/interface.py b/dbtmetabase/models/interface.py new file mode 100644 index 00000000..54f7202d --- /dev/null +++ b/dbtmetabase/models/interface.py @@ -0,0 +1,134 @@ +import logging +import os.path +from typing import Optional, Union, List, Tuple, MutableMapping + +from .config import MetabaseConfig, DbtConfig +from .metabase import MetabaseModel +from .exceptions import ( + NoDbtPathSupplied, + NoDbtSchemaSupplied, + MetabaseClientNotInstantiated, + DbtParserNotInstantiated, + MetabaseUnableToSync, +) +from ..parsers.dbt_folder import DbtFolderReader +from ..parsers.dbt_manifest import DbtManifestReader +from ..metabase import MetabaseClient + + +class MetabaseInterface(MetabaseConfig): + """Interface for interacting with instantiating a Metabase Config and preparing a client object""" + + _client: Optional[MetabaseClient] = None + + @property + def client(self) -> MetabaseClient: + if self._client is None: + raise MetabaseClientNotInstantiated( + "Metabase client is not yet instantiated. Call `prepare_metabase_client` method first" + ) + return self._client + + def prepare_metabase_client(self, dbt_models: Optional[List[MetabaseModel]] = None): + """Prepares the metabase client which can then after be accessed via the `client` property + + Args: + dbt_models (Optional[List[MetabaseModel]]): Used if sync is enabled to verify all dbt models passed exist in Metabase + + Raises: + MetabaseUnableToSync: This error is raised if sync is enabled and a timeout is explicitly set in the `Metabase` object config + """ + if dbt_models is None: + dbt_models = [] + + self._client = MetabaseClient( + host=self.host, + user=self.user, + password=self.password, + use_http=self.use_http, + verify=self.verify, + ) + + # Sync and attempt schema alignment prior to execution; if timeout is not explicitly set, proceed regardless of success + if self.sync: + if self.sync_timeout is not None and not self._client.sync_and_wait( + self.database, + dbt_models, + self.sync_timeout, + ): + logging.critical("Sync timeout reached, models still not compatible") + raise MetabaseUnableToSync( + "Unable to align models between dbt target models and Metabase" + ) + + +class DbtInterface(DbtConfig): + """Interface for interacting with instantiating a Dbt Config and preparing a validated parser object""" + + _parser: Optional[Union[DbtManifestReader, DbtFolderReader]] = None + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.validate_config() + self.prepare_dbt_parser() + + @property + def parser(self) -> Union[DbtManifestReader, DbtFolderReader]: + if self._parser is None: + raise DbtParserNotInstantiated( + "dbt reader is not yet instantiated. Call `prepare_dbt_parser` method first" + ) + return self._parser + + def get_config(self) -> DbtConfig: + return DbtConfig( + database=self.database, + manifest_path=self.manifest_path, + path=self.path, + schema=self.schema, + schema_excludes=self.schema_excludes, + includes=self.includes, + excludes=self.excludes, + ) + + def validate_config(self) -> None: + """Validates a dbt config object + + Raises: + NoDbtPathSupplied: If no path for either manifest or project is supplied, this error is raised + NoDbtSchemaSupplied: If no schema is supplied while using the folder parser, this error is raised + """ + # Check 1 Verify Path + if not (self.path or self.manifest_path): + raise NoDbtPathSupplied( + "One of either dbt_path or dbt_manifest_path is required." + ) + # Check 2 Notify User if Both Paths Are Supplied + if self.path and self.manifest_path: + logging.warning( + "Both dbt path and manifest path were supplied. Prioritizing manifest parser" + ) + # Check 3 Validation for Folder Parser + if self.path and not self.schema: + raise NoDbtSchemaSupplied( + "Must supply a schema if using YAML parser, it is used to resolve foreign key relations and which Metabase models to propagate documentation to" + ) + # ... Add checks to interface as needed + + def prepare_dbt_parser(self) -> None: + """Resolve dbt reader being either YAML or manifest.json based which can then after be accessed via the `parser` property""" + if self.manifest_path: + self._parser = DbtManifestReader(os.path.expandvars(self.manifest_path)) + elif self.path: + self._parser = DbtFolderReader(os.path.expandvars(self.path)) + else: + raise NoDbtPathSupplied( + "One of either dbt_path or dbt_manifest_path is required." + ) + + def read_models( + self, + include_tags: bool = True, + docs_url: Optional[str] = None, + ) -> Tuple[List[MetabaseModel], MutableMapping]: + return self.parser.read_models(self, include_tags, docs_url) diff --git a/dbtmetabase/models/metabase.py b/dbtmetabase/models/metabase.py index c2fb3713..68644e38 100644 --- a/dbtmetabase/models/metabase.py +++ b/dbtmetabase/models/metabase.py @@ -8,7 +8,7 @@ METABASE_META_FIELDS = ["special_type", "semantic_type", "visibility_type"] -class ModelKey(str, Enum): +class ModelType(str, Enum): nodes = "nodes" sources = "sources" @@ -16,7 +16,7 @@ class ModelKey(str, Enum): @dataclass class MetabaseColumn: name: str - description: str = "" + description: Optional[str] = None meta_fields: MutableMapping = field(default_factory=dict) @@ -32,7 +32,18 @@ class MetabaseModel: name: str schema: str description: str = "" - model_key: ModelKey = ModelKey.nodes - ref: Optional[str] = None + + model_type: ModelType = ModelType.nodes + dbt_name: Optional[str] = None + source: Optional[str] = None + unique_id: Optional[str] = None + + @property + def ref(self) -> Optional[str]: + if self.model_type == ModelType.nodes: + return f"ref('{self.name}')" + elif self.model_type == ModelType.sources: + return f"source('{self.source}', '{self.name if self.dbt_name is None else self.dbt_name}')" + return None columns: Sequence[MetabaseColumn] = field(default_factory=list) diff --git a/dbtmetabase/parsers/dbt_folder.py b/dbtmetabase/parsers/dbt_folder.py index a034e75d..4739c563 100644 --- a/dbtmetabase/parsers/dbt_folder.py +++ b/dbtmetabase/parsers/dbt_folder.py @@ -1,12 +1,13 @@ -import logging import os import re import yaml from pathlib import Path -from typing import List, Iterable, Mapping, MutableMapping, Optional +from typing import List, Iterable, Mapping, MutableMapping, Optional, Tuple -from ..models.metabase import METABASE_META_FIELDS, ModelKey +from ..models.config import DbtConfig +from ..models.metabase import METABASE_META_FIELDS, ModelType from ..models.metabase import MetabaseModel, MetabaseColumn +from ..logger.logging import logger class DbtFolderReader: @@ -22,28 +23,31 @@ def __init__(self, project_path: str): """ self.project_path = os.path.expanduser(project_path) - self.catch_aliases: MutableMapping = {} + self.alias_mapping: MutableMapping = {} def read_models( self, - database: Optional[str] = None, - schema: Optional[str] = None, - schema_excludes: Iterable = None, - includes: Iterable = None, - excludes: Iterable = None, + dbt_config: DbtConfig, include_tags: bool = True, docs_url: Optional[str] = None, - ) -> List[MetabaseModel]: + ) -> Tuple[List[MetabaseModel], MutableMapping]: """Reads dbt models in Metabase-friendly format. Keyword Arguments: - includes {list} -- Model names to limit processing to. (default: {None}) - excludes {list} -- Model names to exclude. (default: {None}) + dbt_config {Dbt} -- Dbt object + include_tags {bool} -- Append dbt model tags to dbt model descriptions. (default: {True}) + docs_url {Optional[str]} -- Append dbt docs url to dbt model description Returns: list -- List of dbt models in Metabase-friendly format. """ + database = dbt_config.database + schema = dbt_config.schema + schema_excludes = dbt_config.schema_excludes + includes = dbt_config.includes + excludes = dbt_config.excludes + if schema_excludes is None: schema_excludes = [] if includes is None: @@ -59,101 +63,112 @@ def read_models( mb_models: List[MetabaseModel] = [] for path in (Path(self.project_path) / "models").rglob("*.yml"): - logging.info("Processing model: %s", path) with open(path, "r", encoding="utf-8") as stream: schema_file = yaml.safe_load(stream) if schema_file is None: - logging.warning("Skipping empty or invalid YAML: %s", path) + logger().warning("Skipping empty or invalid YAML: %s", path) continue for model in schema_file.get("models", []): - name = model.get("identifier", model["name"]) - if "identifier" in model: - self.catch_aliases[name] = model["name"] - logging.info("Model: %s", name) + name = model.get("alias", model["name"]) + # Refs will still use file name -- this alias mapping is good for getting the right name in the database + if "alias" in model: + self.alias_mapping[name] = model["name"] + logger().info("\nProcessing model: %s", path) if (not includes or name in includes) and (name not in excludes): mb_models.append( self._read_model( - model, - schema.upper(), + model=model, + schema=schema.upper(), + model_type=ModelType.nodes, include_tags=include_tags, - model_key=ModelKey.nodes, ) ) + logger().debug(mb_models[-1].ref) for source in schema_file.get("sources", []): source_schema_name = source.get("schema", source["name"]) if "{{" in source_schema_name and "}}" in source_schema_name: - logging.warning( + logger().warning( "dbt Folder Reader cannot resolve jinja expressions- use the Manifest Reader instead." ) + source_schema_name = schema if source_schema_name.upper() != schema.upper(): continue for model in source.get("tables", []): name = model.get("identifier", model["name"]) + # These will be used to resolve our regex parsed source() references if "identifier" in model: - self.catch_aliases[name] = model["name"] - logging.info("Source: %s", name) + self.alias_mapping[name] = model["name"] + logger().info( + "\nProcessing source: %s -- table: %s", path, name + ) if (not includes or name in includes) and ( name not in excludes ): mb_models.append( self._read_model( - model, - source_schema_name.upper(), - include_tags=include_tags, - model_key=ModelKey.sources, + model=model, source=source["name"], + model_type=ModelType.sources, + schema=source_schema_name.upper(), + include_tags=include_tags, ) ) + logger().debug(mb_models[-1].ref) - return mb_models + return mb_models, self.alias_mapping def _read_model( self, model: dict, schema: str, + source: Optional[str] = None, + model_type: ModelType = ModelType.nodes, include_tags: bool = True, - model_key: ModelKey = ModelKey.nodes, - source: str = None, ) -> MetabaseModel: """Reads one dbt model in Metabase-friendly format. Arguments: model {dict} -- One dbt model to read. + schema {str} -- Schema as passed doen from CLI args or parsed from `source` + source {str, optional} -- Name of the source if source + model_type {str} -- The type of the node which can be one of either nodes or sources + include_tags: {bool} -- Flag to append tags to description of model Returns: dict -- One dbt model in Metabase-friendly format. """ - mb_columns: List[MetabaseColumn] = [] + metabase_columns: List[MetabaseColumn] = [] for column in model.get("columns", []): - mb_columns.append(self._read_column(column, schema)) + metabase_columns.append(self._read_column(column, schema)) description = model.get("description", "") if include_tags: - tags = model.get("tags") + tags = model.get("tags", []) if tags: tags = ", ".join(tags) if description: description += "\n\n" description += f"Tags: {tags}" - ref: Optional[str] = None - - if model_key == ModelKey.nodes: - ref = f"ref('{model.get('identifier', model['name'])}')" - elif model_key == ModelKey.sources: - ref = f"source('{source}', '{model['name']}')" + # Resolved name is what the name will be in the database + resolved_name = model.get("alias", model.get("identifier")) + dbt_name = None + if not resolved_name: + resolved_name = model["name"] + else: + dbt_name = model["name"] return MetabaseModel( - # We are implicitly complying with aliases by doing this - name=model.get("identifier", model["name"]).upper(), + name=resolved_name, schema=schema, description=description, - columns=mb_columns, - model_key=model_key, - ref=ref, + columns=metabase_columns, + model_type=model_type, + source=source, + dbt_name=dbt_name, ) def _read_column(self, column: Mapping, schema: str) -> MetabaseColumn: @@ -161,14 +176,17 @@ def _read_column(self, column: Mapping, schema: str) -> MetabaseColumn: Arguments: column {dict} -- One dbt column to read. + schema {str} -- Schema as passed doen from CLI args or parsed from `source` Returns: dict -- One dbt column in Metabase-friendly format. """ - mb_column = MetabaseColumn( - name=column.get("name", "").upper().strip('"'), - description=column.get("description", ""), + column_name = column.get("name", "").upper().strip('"') + column_description = column.get("description") + metabase_column = MetabaseColumn( + name=column_name, + description=column_description, ) tests: Optional[Iterable] = column.get("tests", []) @@ -179,53 +197,48 @@ def _read_column(self, column: Mapping, schema: str) -> MetabaseColumn: if isinstance(test, dict): if "relationships" in test: relationships = test["relationships"] - mb_column.semantic_type = "type/FK" - # Note: For foreign keys that point to a different schema than the target, the yml meta: metabase.fk_ref must be used - # Otherwise we use target schema which should be fine in 95% of cases - mb_column.fk_target_table = ( - column.get("meta", {}) - .get( - # Prioritize explicitly set FK in YAML file which should have format schema.table unaliased - "metabase.fk_ref", - # If metabase.fk_ref not present in YAML, infer FK relation to table in target schema and parse ref/source - # We will be translating any potentially aliased source() calls later during FK parsing since we do not have all possible aliases yet and thus cannot unalias - self.parse_ref(relationships["to"], schema), - ) - .strip('"') - ) - if not mb_column.fk_target_table: - logging.warning( - "Could not resolve foreign key target for column %s", - mb_column.name, + parsed_table_ref = self.parse_ref(relationships["to"]) + if not parsed_table_ref: + logger().warning( + "Could not resolve foreign key target table for column %s", + metabase_column.name, ) continue - # Lets be lenient and try to infer target schema if it was not provided when specified in metabase.fk_ref - # Because parse_ref guarantees schema.table format, we can assume this was derived through fk_ref - if "." not in mb_column.fk_target_table: - logging.warning( - "Target table %s has fk ref declared through metabase.fk_ref missing schema (Format should be schema.table), inferring from target", - mb_column.fk_target_table, - ) - mb_column.fk_target_table = ( - f"{schema}.{mb_column.fk_target_table}" + + parsed_ref = ".".join( + map( + lambda s: s.strip('"'), + column.get("meta", {}) + .get("metabase.foreign_key_target_table", "") + .split("."), ) - mb_column.fk_target_table = mb_column.fk_target_table.upper() - # Account for (example) '"Id"' relationship: to: fields used as a workaround for current tests not quoting consistently - mb_column.fk_target_field = ( - relationships["field"].upper().strip('"') + ) + if not parsed_ref or "." not in parsed_ref: + parsed_ref = f"{schema}.{parsed_table_ref}" + + metabase_column.semantic_type = "type/FK" + metabase_column.fk_target_table = parsed_ref.upper() + metabase_column.fk_target_field = ( + str(relationships["field"]).upper().strip('"') + ) + logger().debug( + "Relation from %s to %s.%s", + column.get("name", "").upper().strip('"'), + metabase_column.fk_target_table, + metabase_column.fk_target_field, ) if "meta" in column: meta = column.get("meta", []) for field in METABASE_META_FIELDS: if f"metabase.{field}" in meta: - setattr(mb_column, field, meta[f"metabase.{field}"]) + setattr(metabase_column, field, meta[f"metabase.{field}"]) - return mb_column + return metabase_column @staticmethod - def parse_ref(text: str, schema: str) -> str: - """Parses dbt ref() statement. + def parse_ref(text: str) -> Optional[str]: + """Parses dbt ref() or source() statement. Arguments: text {str} -- Full statement in dbt YAML. @@ -235,9 +248,9 @@ def parse_ref(text: str, schema: str) -> str: """ # matches = re.findall(r"ref\(['\"]([\w\_\-\ ]+)['\"]\)", text) - # If we relax our matching here, we are able to catch the rightmost argument of either source or ref which is ultimately the table name - # We can and should identify a way to handle indentifier specs, but as is this will add compatibility with many sources - matches = re.findall(r"['\"]([\w\_\-\ ]+)['\"].*\)", text) + # We are catching the rightmost argument of either source or ref which is ultimately the table name + matches = re.findall(r"['\"]([\w\_\-\ ]+)['\"][ ]*\)$", text.strip()) if matches: - return f"{schema}.{matches[0]}" - return f"{schema}.{text}" + logger().debug("%s -> %s", text, matches[0]) + return matches[0] + return None diff --git a/dbtmetabase/parsers/dbt_manifest.py b/dbtmetabase/parsers/dbt_manifest.py index 49fb1a07..53a0201a 100644 --- a/dbtmetabase/parsers/dbt_manifest.py +++ b/dbtmetabase/parsers/dbt_manifest.py @@ -1,10 +1,11 @@ import json -import logging import os -from typing import List, Iterable, Mapping, Optional, MutableMapping +from typing import List, Tuple, Mapping, Optional, MutableMapping -from ..models.metabase import METABASE_META_FIELDS, ModelKey +from ..models.config import DbtConfig +from ..models.metabase import METABASE_META_FIELDS, ModelType from ..models.metabase import MetabaseModel, MetabaseColumn +from ..logger.logging import logger class DbtManifestReader: @@ -21,18 +22,30 @@ def __init__(self, project_path: str): self.manifest_path = os.path.expanduser(project_path) self.manifest: Mapping = {} - self.catch_aliases: MutableMapping = {} + self.alias_mapping: MutableMapping = {} def read_models( self, - database: str, - schema: str = None, - schema_excludes: Iterable = None, - includes: Iterable = None, - excludes: Iterable = None, + dbt_config: DbtConfig, include_tags: bool = True, - docs_url: str = None, - ) -> List[MetabaseModel]: + docs_url: Optional[str] = None, + ) -> Tuple[List[MetabaseModel], MutableMapping]: + """Reads dbt models in Metabase-friendly format. + + Keyword Arguments: + dbt_config {Dbt} -- Dbt object + include_tags {bool} -- Append dbt model tags to dbt model descriptions. (default: {True}) + docs_url {Optional[str]} -- Append dbt docs url to dbt model description + + Returns: + list -- List of dbt models in Metabase-friendly format. + """ + + database = dbt_config.database + schema = dbt_config.schema + schema_excludes = dbt_config.schema_excludes + includes = dbt_config.includes + excludes = dbt_config.excludes if schema_excludes is None: schema_excludes = [] @@ -51,26 +64,26 @@ def read_models( model_name = node["name"].upper() if node["config"]["materialized"] == "ephemeral": - logging.info( + logger().debug( "Skipping ephemeral model %s not manifested in database", model_name ) continue if node["database"].upper() != database.upper(): # Skip model not associated with target database - logging.debug( + logger().debug( "Skipping %s not in target database %s", model_name, database ) continue if node["resource_type"] != "model": # Target only model nodes - logging.debug("Skipping %s not of resource type model", model_name) + logger().debug("Skipping %s not of resource type model", model_name) continue if schema and node["schema"].upper() != schema.upper(): # Skip any models not in target schema - logging.debug( + logger().debug( "Skipping %s in schema %s not in target schema %s", model_name, node["schema"], @@ -80,7 +93,7 @@ def read_models( if schema_excludes and node["schema"].upper() in schema_excludes: # Skip any model in a schema marked for exclusion - logging.debug( + logger().debug( "Skipping %s in schema %s marked for exclusion", model_name, node["schema"], @@ -89,14 +102,20 @@ def read_models( if (includes and model_name not in includes) or (model_name in excludes): # Process only intersect of includes and excludes - logging.debug( + logger().debug( "Skipping %s not included in includes or excluded by excludes", model_name, ) continue mb_models.append( - self._read_model(node, include_tags=include_tags, docs_url=docs_url) + self._read_model( + node, + include_tags=include_tags, + docs_url=docs_url, + model_type=ModelType.nodes, + source=None, + ) ) for _, node in self.manifest["sources"].items(): @@ -104,19 +123,19 @@ def read_models( if node["database"].upper() != database.upper(): # Skip model not associated with target database - logging.debug( + logger().debug( "Skipping %s not in target database %s", model_name, database ) continue if node["resource_type"] != "source": # Target only source nodes - logging.debug("Skipping %s not of resource type source", model_name) + logger().debug("Skipping %s not of resource type source", model_name) continue if schema and node["schema"].upper() != schema.upper(): # Skip any models not in target schema - logging.debug( + logger().debug( "Skipping %s in schema %s not in target schema %s", model_name, node["schema"], @@ -126,7 +145,7 @@ def read_models( if schema_excludes and node["schema"].upper() in schema_excludes: # Skip any model in a schema marked for exclusion - logging.debug( + logger().debug( "Skipping %s in schema %s marked for exclusion", model_name, node["schema"], @@ -135,7 +154,7 @@ def read_models( if (includes and model_name not in includes) or (model_name in excludes): # Process only intersect of includes and excludes - logging.debug( + logger().debug( "Skipping %s not included in includes or excluded by excludes", model_name, ) @@ -146,65 +165,69 @@ def read_models( node, include_tags=include_tags, docs_url=docs_url, - model_key=ModelKey.sources, + model_type=ModelType.sources, source=node["source_name"], ) ) - return mb_models + return mb_models, self.alias_mapping def _read_model( self, model: dict, + source: Optional[str] = None, + model_type: ModelType = ModelType.nodes, include_tags: bool = True, docs_url: Optional[str] = None, - model_key: ModelKey = ModelKey.nodes, - source: Optional[str] = None, ) -> MetabaseModel: """Reads one dbt model in Metabase-friendly format. Arguments: model {dict} -- One dbt model to read. + source {str, optional} -- Name of the source if source + model_type {str} -- The type of the node which can be one of either nodes or sources + include_tags: {bool} -- Flag to append tags to description of model Returns: dict -- One dbt model in Metabase-friendly format. """ - mb_columns: List[MetabaseColumn] = [] + metabase_column: List[MetabaseColumn] = [] children = self.manifest["child_map"][model["unique_id"]] relationship_tests = {} for child_id in children: child = {} - if self.manifest[model_key]: - child = self.manifest[model_key].get(child_id, {}) + if self.manifest[model_type]: + child = self.manifest[model_type].get(child_id, {}) + # Only proceed if we are seeing an explicitly declared relationship test if ( child.get("resource_type") == "test" and child.get("test_metadata", {}).get("name") == "relationships" ): - # Only proceed if we are seeing an explicitly declared relationship test - # To get the name of the foreign table, we could use child['test_metadata']['kwargs']['to'], which - # would return the ref() written in the test, but if the model as an alias, that's not enough. + # would return the ref() written in the test, but if the model has an alias, that's not enough. # It is better to use child['depends_on']['nodes'] and exclude the current model depends_on_id = list( - set(child["depends_on"][model_key]) - {model["unique_id"]} + set(child["depends_on"][model_type]) - {model["unique_id"]} )[0] - try: - fk_target_table_alias = self.manifest[model_key][depends_on_id][ - "alias" - ] - except KeyError: - logging.debug( + foreign_key_model = self.manifest[model_type].get(depends_on_id, {}) + fk_target_table_alias = foreign_key_model.get( + "alias", + foreign_key_model.get("identifier", foreign_key_model.get("name")), + ) + + if not fk_target_table_alias: + logger().debug( "Could not resolve depends on model id %s to a model in manifest", depends_on_id, ) continue - fk_target_schema = self.manifest[model_key][depends_on_id].get( + fk_target_schema = self.manifest[model_type][depends_on_id].get( "schema", "public" ) fk_target_field = child["test_metadata"]["kwargs"]["field"].strip('"') @@ -215,75 +238,85 @@ def _read_model( } for _, column in model.get("columns", {}).items(): - mb_columns.append( - self._read_column(column, relationship_tests.get(column["name"])) + metabase_column.append( + self._read_column( + column=column, + relationship=relationship_tests.get(column["name"]), + ) ) description = model.get("description", "") if include_tags: - tags = model.get("tags") + tags = model.get("tags", []) if tags: tags = ", ".join(tags) if description != "": description += "\n\n" description += f"Tags: {tags}" + unique_id = model["unique_id"] if docs_url: - full_path = f"{docs_url}/#!/model/{model['unique_id']}" + full_path = f"{docs_url}/#!/model/{unique_id}" if description != "": description += "\n\n" description += f"dbt docs link: {full_path}" - ref: Optional[str] = None - - if model_key == ModelKey.nodes: - ref = f"ref('{model.get('identifier', model['name'])}')" - elif model_key == ModelKey.sources: - ref = f"source('{source}', '{model['name']}')" + resolved_name = model.get("alias", model.get("identifier")) + dbt_name = None + if not resolved_name: + resolved_name = model["name"] + else: + dbt_name = model["name"] return MetabaseModel( - name=model.get("alias", model.get("identifier", model.get("name"))).upper(), + name=resolved_name, schema=model["schema"].upper(), description=description, - columns=mb_columns, - model_key=model_key, - ref=ref, + columns=metabase_column, + model_type=model_type, + unique_id=unique_id, + source=source, + dbt_name=dbt_name, ) def _read_column( - self, column: Mapping, relationship: Optional[Mapping] + self, + column: Mapping, + relationship: Optional[Mapping], ) -> MetabaseColumn: """Reads one dbt column in Metabase-friendly format. Arguments: column {dict} -- One dbt column to read. + relationship {Mapping, optional} -- Mapping of columns to their foreign key relationships Returns: dict -- One dbt column in Metabase-friendly format. """ - description = column.get("description", "") - - mb_column = MetabaseColumn( - name=column.get("name", "").upper().strip('"'), description=description + column_name = column.get("name", "").upper().strip('"') + column_description = column.get("description") + metabase_column = MetabaseColumn( + name=column_name, + description=column_description, ) if relationship: - mb_column.semantic_type = "type/FK" - mb_column.fk_target_table = relationship["fk_target_table"].upper() - mb_column.fk_target_field = relationship["fk_target_field"].upper() - logging.debug( + metabase_column.semantic_type = "type/FK" + metabase_column.fk_target_table = relationship["fk_target_table"].upper() + metabase_column.fk_target_field = relationship["fk_target_field"].upper() + logger().debug( "Relation from %s to %s.%s", column.get("name", "").upper().strip('"'), - mb_column.fk_target_table, - mb_column.fk_target_field, + metabase_column.fk_target_table, + metabase_column.fk_target_field, ) if column["meta"]: meta = column.get("meta", []) for field in METABASE_META_FIELDS: if f"metabase.{field}" in meta: - setattr(mb_column, field, meta[f"metabase.{field}"]) + setattr(metabase_column, field, meta[f"metabase.{field}"]) - return mb_column + return metabase_column diff --git a/dbtmetabase/utils.py b/dbtmetabase/utils.py index 7330a2e9..005810ea 100644 --- a/dbtmetabase/utils.py +++ b/dbtmetabase/utils.py @@ -1,5 +1,8 @@ import logging import sys +from pathlib import Path + +import yaml def get_version() -> str: @@ -27,3 +30,15 @@ def get_version() -> str: logging.warning("No version found in metadata") return "0.0.0-UNKONWN" + + +def load_config() -> dict: + config_data = {} + config_path = Path.home() / ".dbt-metabase" + if (config_path / "config.yml").exists(): + with open(config_path / "config.yml", "r", encoding="utf-8") as f: + config_data = yaml.safe_load(f).get("config", {}) + elif (config_path / "config.yaml").exists(): + with open(config_path / "config.yaml", "r", encoding="utf-8") as f: + config_data = yaml.safe_load(f).get("config", {}) + return config_data diff --git a/requirements.txt b/requirements.txt index 6c9fdba9..485e1294 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,4 @@ PyYAML requests +click +rich diff --git a/tests/__init__.py b/tests/__init__.py index 4b430911..67a65a94 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,3 +1,2 @@ -from .test_dbt_folder_reader import * -from .test_dbt_manifest_reader import * +from .test_dbt_parsers import * from .test_metabase import * diff --git a/tests/test_dbt_folder_reader.py b/tests/test_dbt_folder_reader.py deleted file mode 100644 index 3f3772bf..00000000 --- a/tests/test_dbt_folder_reader.py +++ /dev/null @@ -1,265 +0,0 @@ -import logging -import unittest - -from dbtmetabase.models.metabase import ModelKey -from dbtmetabase.parsers.dbt_folder import ( - DbtFolderReader, - MetabaseModel, - MetabaseColumn, -) - - -class MockDbtFolderReader(DbtFolderReader): - pass - - -class TestDbtFolderReader(unittest.TestCase): - def setUp(self): - """Must specify dbt root dir""" - self.reader = DbtFolderReader(project_path="tests/fixtures/sample_project/") - logging.getLogger(__name__) - logging.basicConfig(level=logging.DEBUG) - - def test_read_models(self): - models = self.reader.read_models( - database="test", - schema="public", - ) - expectation = [ - MetabaseModel( - name="CUSTOMERS", - schema="PUBLIC", - description="This table has basic information about a customer, as well as some derived facts based on a customer's orders", - model_key=ModelKey.nodes, - ref="ref('customers')", - columns=[ - MetabaseColumn( - name="CUSTOMER_ID", - description="This is a unique identifier for a customer", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="FIRST_NAME", - description="Customer's first name. PII.", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="LAST_NAME", - description="Customer's last name. PII.", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="FIRST_ORDER", - description="Date (UTC) of a customer's first order", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="MOST_RECENT_ORDER", - description="Date (UTC) of a customer's most recent order", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="NUMBER_OF_ORDERS", - description="Count of the number of orders a customer has placed", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="TOTAL_ORDER_AMOUNT", - description="Total value (AUD) of a customer's orders", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - ], - ), - MetabaseModel( - name="ORDERS", - schema="PUBLIC", - description="This table has basic information about orders, as well as some derived facts based on payments", - model_key=ModelKey.nodes, - ref="ref('orders')", - columns=[ - MetabaseColumn( - name="ORDER_ID", - description="This is a unique identifier for an order", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="CUSTOMER_ID", - description="Foreign key to the customers table", - meta_fields={}, - semantic_type="type/FK", - visibility_type=None, - fk_target_table="PUBLIC.CUSTOMERS", - fk_target_field="CUSTOMER_ID", - ), - MetabaseColumn( - name="ORDER_DATE", - description="Date (UTC) that the order was placed", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="STATUS", - description='{{ doc("orders_status") }}', - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="AMOUNT", - description="Total amount (AUD) of the order", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="CREDIT_CARD_AMOUNT", - description="Amount of the order (AUD) paid for by credit card", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="COUPON_AMOUNT", - description="Amount of the order (AUD) paid for by coupon", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="BANK_TRANSFER_AMOUNT", - description="Amount of the order (AUD) paid for by bank transfer", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="GIFT_CARD_AMOUNT", - description="Amount of the order (AUD) paid for by gift card", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - ], - ), - MetabaseModel( - name="STG_CUSTOMERS", - schema="PUBLIC", - description="", - model_key=ModelKey.nodes, - ref="ref('stg_customers')", - columns=[ - MetabaseColumn( - name="CUSTOMER_ID", - description="", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ) - ], - ), - MetabaseModel( - name="STG_ORDERS", - schema="PUBLIC", - description="", - model_key=ModelKey.nodes, - ref="ref('stg_orders')", - columns=[ - MetabaseColumn( - name="ORDER_ID", - description="", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="STATUS", - description="", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - ], - ), - MetabaseModel( - name="STG_PAYMENTS", - schema="PUBLIC", - description="", - model_key=ModelKey.nodes, - ref="ref('stg_payments')", - columns=[ - MetabaseColumn( - name="PAYMENT_ID", - description="", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="PAYMENT_METHOD", - description="", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - ], - ), - ] - self.assertEqual(models, expectation) - logging.info("Done") diff --git a/tests/test_dbt_manifest_reader.py b/tests/test_dbt_manifest_reader.py deleted file mode 100644 index 4e8d98a4..00000000 --- a/tests/test_dbt_manifest_reader.py +++ /dev/null @@ -1,266 +0,0 @@ -import logging -import unittest - -from dbtmetabase.models.metabase import ModelKey -from dbtmetabase.parsers.dbt_manifest import ( - DbtManifestReader, - MetabaseModel, - MetabaseColumn, -) - - -class MockDbtManifestReader(DbtManifestReader): - pass - - -class TestDbtManifestReader(unittest.TestCase): - def setUp(self): - self.reader = DbtManifestReader( - project_path="tests/fixtures/sample_project/target/manifest.json" - ) - logging.getLogger(__name__) - logging.basicConfig(level=logging.DEBUG) - - def test_read_models(self): - models = self.reader.read_models( - database="test", - schema="public", - ) - expectation = [ - MetabaseModel( - name="ORDERS", - schema="PUBLIC", - description="This table has basic information about orders, as well as some derived facts based on payments", - model_key=ModelKey.nodes, - ref="ref('orders')", - columns=[ - MetabaseColumn( - name="ORDER_ID", - description="This is a unique identifier for an order", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="CUSTOMER_ID", - description="Foreign key to the customers table", - meta_fields={}, - semantic_type="type/FK", - visibility_type=None, - fk_target_table="PUBLIC.CUSTOMERS", - fk_target_field="CUSTOMER_ID", - ), - MetabaseColumn( - name="ORDER_DATE", - description="Date (UTC) that the order was placed", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="STATUS", - description="Orders can be one of the following statuses:\n\n| status | description |\n|----------------|------------------------------------------------------------------------------------------------------------------------|\n| placed | The order has been placed but has not yet left the warehouse |\n| shipped | The order has ben shipped to the customer and is currently in transit |\n| completed | The order has been received by the customer |\n| return_pending | The customer has indicated that they would like to return the order, but it has not yet been received at the warehouse |\n| returned | The order has been returned by the customer and received at the warehouse |", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="AMOUNT", - description="Total amount (AUD) of the order", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="CREDIT_CARD_AMOUNT", - description="Amount of the order (AUD) paid for by credit card", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="COUPON_AMOUNT", - description="Amount of the order (AUD) paid for by coupon", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="BANK_TRANSFER_AMOUNT", - description="Amount of the order (AUD) paid for by bank transfer", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="GIFT_CARD_AMOUNT", - description="Amount of the order (AUD) paid for by gift card", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - ], - ), - MetabaseModel( - name="CUSTOMERS", - schema="PUBLIC", - description="This table has basic information about a customer, as well as some derived facts based on a customer's orders", - model_key=ModelKey.nodes, - ref="ref('customers')", - columns=[ - MetabaseColumn( - name="CUSTOMER_ID", - description="This is a unique identifier for a customer", - meta_fields={}, - semantic_type="type/FK", - visibility_type=None, - fk_target_table="PUBLIC.ORDERS", - fk_target_field="CUSTOMER_ID", - ), - MetabaseColumn( - name="FIRST_NAME", - description="Customer's first name. PII.", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="LAST_NAME", - description="Customer's last name. PII.", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="FIRST_ORDER", - description="Date (UTC) of a customer's first order", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="MOST_RECENT_ORDER", - description="Date (UTC) of a customer's most recent order", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="NUMBER_OF_ORDERS", - description="Count of the number of orders a customer has placed", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="TOTAL_ORDER_AMOUNT", - description="Total value (AUD) of a customer's orders", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - ], - ), - MetabaseModel( - name="STG_ORDERS", - schema="PUBLIC", - description="", - model_key=ModelKey.nodes, - ref="ref('stg_orders')", - columns=[ - MetabaseColumn( - name="ORDER_ID", - description="", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="STATUS", - description="", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - ], - ), - MetabaseModel( - name="STG_PAYMENTS", - schema="PUBLIC", - description="", - model_key=ModelKey.nodes, - ref="ref('stg_payments')", - columns=[ - MetabaseColumn( - name="PAYMENT_ID", - description="", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - MetabaseColumn( - name="PAYMENT_METHOD", - description="", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ), - ], - ), - MetabaseModel( - name="STG_CUSTOMERS", - schema="PUBLIC", - description="", - model_key=ModelKey.nodes, - ref="ref('stg_customers')", - columns=[ - MetabaseColumn( - name="CUSTOMER_ID", - description="", - meta_fields={}, - semantic_type=None, - visibility_type=None, - fk_target_table=None, - fk_target_field=None, - ) - ], - ), - ] - self.assertEqual(models, expectation) - logging.info("Done") diff --git a/tests/test_dbt_parsers.py b/tests/test_dbt_parsers.py new file mode 100644 index 00000000..6f051ae8 --- /dev/null +++ b/tests/test_dbt_parsers.py @@ -0,0 +1,535 @@ +import logging +import unittest + +from dbtmetabase.models.interface import DbtInterface +from dbtmetabase.models.metabase import ModelType +from dbtmetabase.parsers.dbt_folder import ( + MetabaseModel, + MetabaseColumn, +) + + +class TestDbtFolderReader(unittest.TestCase): + def setUp(self): + """Must specify dbt root dir""" + self.interface = DbtInterface( + database="test", + schema="public", + path="tests/fixtures/sample_project/", + ) + logging.getLogger(__name__) + logging.basicConfig(level=logging.DEBUG) + + def test_read_models(self): + models = self.interface.parser.read_models(self.interface.get_config())[0] + expectation = [ + MetabaseModel( + name="customers", + schema="PUBLIC", + description="This table has basic information about a customer, as well as some derived facts based on a customer's orders", + model_type=ModelType.nodes, + dbt_name=None, + source=None, + unique_id=None, + columns=[ + MetabaseColumn( + name="CUSTOMER_ID", + description="This is a unique identifier for a customer", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="FIRST_NAME", + description="Customer's first name. PII.", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="LAST_NAME", + description="Customer's last name. PII.", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="FIRST_ORDER", + description="Date (UTC) of a customer's first order", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="MOST_RECENT_ORDER", + description="Date (UTC) of a customer's most recent order", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="NUMBER_OF_ORDERS", + description="Count of the number of orders a customer has placed", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="TOTAL_ORDER_AMOUNT", + description="Total value (AUD) of a customer's orders", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + ], + ), + MetabaseModel( + name="orders", + schema="PUBLIC", + description="This table has basic information about orders, as well as some derived facts based on payments", + model_type=ModelType.nodes, + dbt_name=None, + source=None, + unique_id=None, + columns=[ + MetabaseColumn( + name="ORDER_ID", + description="This is a unique identifier for an order", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="CUSTOMER_ID", + description="Foreign key to the customers table", + meta_fields={}, + semantic_type="type/FK", + visibility_type=None, + fk_target_table="PUBLIC.CUSTOMERS", + fk_target_field="CUSTOMER_ID", + ), + MetabaseColumn( + name="ORDER_DATE", + description="Date (UTC) that the order was placed", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="STATUS", + description='{{ doc("orders_status") }}', + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="AMOUNT", + description="Total amount (AUD) of the order", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="CREDIT_CARD_AMOUNT", + description="Amount of the order (AUD) paid for by credit card", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="COUPON_AMOUNT", + description="Amount of the order (AUD) paid for by coupon", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="BANK_TRANSFER_AMOUNT", + description="Amount of the order (AUD) paid for by bank transfer", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="GIFT_CARD_AMOUNT", + description="Amount of the order (AUD) paid for by gift card", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + ], + ), + MetabaseModel( + name="stg_customers", + schema="PUBLIC", + description="", + model_type=ModelType.nodes, + dbt_name=None, + source=None, + unique_id=None, + columns=[ + MetabaseColumn( + name="CUSTOMER_ID", + description=None, + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ) + ], + ), + MetabaseModel( + name="stg_orders", + schema="PUBLIC", + description="", + model_type=ModelType.nodes, + dbt_name=None, + source=None, + unique_id=None, + columns=[ + MetabaseColumn( + name="ORDER_ID", + description=None, + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="STATUS", + description=None, + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + ], + ), + MetabaseModel( + name="stg_payments", + schema="PUBLIC", + description="", + model_type=ModelType.nodes, + dbt_name=None, + source=None, + unique_id=None, + columns=[ + MetabaseColumn( + name="PAYMENT_ID", + description=None, + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="PAYMENT_METHOD", + description=None, + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + ], + ), + ] + self.assertEqual(models, expectation) + logging.info("Done") + + +class TestDbtManifestReader(unittest.TestCase): + def setUp(self): + """Must specify dbt root dir""" + self.interface = DbtInterface( + database="test", + schema="public", + manifest_path="tests/fixtures/sample_project/target/manifest.json", + ) + logging.getLogger(__name__) + logging.basicConfig(level=logging.DEBUG) + + def test_read_models(self): + models = self.interface.parser.read_models(self.interface.get_config())[0] + expectation = [ + MetabaseModel( + name="orders", + schema="PUBLIC", + description="This table has basic information about orders, as well as some derived facts based on payments", + model_type=ModelType.nodes, + dbt_name="orders", + source=None, + unique_id="model.jaffle_shop.orders", + columns=[ + MetabaseColumn( + name="ORDER_ID", + description="This is a unique identifier for an order", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="CUSTOMER_ID", + description="Foreign key to the customers table", + meta_fields={}, + semantic_type="type/FK", + visibility_type=None, + fk_target_table="PUBLIC.CUSTOMERS", + fk_target_field="CUSTOMER_ID", + ), + MetabaseColumn( + name="ORDER_DATE", + description="Date (UTC) that the order was placed", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="STATUS", + description="Orders can be one of the following statuses:\n\n| status | description |\n|----------------|------------------------------------------------------------------------------------------------------------------------|\n| placed | The order has been placed but has not yet left the warehouse |\n| shipped | The order has ben shipped to the customer and is currently in transit |\n| completed | The order has been received by the customer |\n| return_pending | The customer has indicated that they would like to return the order, but it has not yet been received at the warehouse |\n| returned | The order has been returned by the customer and received at the warehouse |", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="AMOUNT", + description="Total amount (AUD) of the order", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="CREDIT_CARD_AMOUNT", + description="Amount of the order (AUD) paid for by credit card", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="COUPON_AMOUNT", + description="Amount of the order (AUD) paid for by coupon", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="BANK_TRANSFER_AMOUNT", + description="Amount of the order (AUD) paid for by bank transfer", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="GIFT_CARD_AMOUNT", + description="Amount of the order (AUD) paid for by gift card", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + ], + ), + MetabaseModel( + name="customers", + schema="PUBLIC", + description="This table has basic information about a customer, as well as some derived facts based on a customer's orders", + model_type=ModelType.nodes, + dbt_name="customers", + source=None, + unique_id="model.jaffle_shop.customers", + columns=[ + MetabaseColumn( + name="CUSTOMER_ID", + description="This is a unique identifier for a customer", + meta_fields={}, + semantic_type="type/FK", + visibility_type=None, + fk_target_table="PUBLIC.ORDERS", + fk_target_field="CUSTOMER_ID", + ), + MetabaseColumn( + name="FIRST_NAME", + description="Customer's first name. PII.", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="LAST_NAME", + description="Customer's last name. PII.", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="FIRST_ORDER", + description="Date (UTC) of a customer's first order", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="MOST_RECENT_ORDER", + description="Date (UTC) of a customer's most recent order", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="NUMBER_OF_ORDERS", + description="Count of the number of orders a customer has placed", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="TOTAL_ORDER_AMOUNT", + description="Total value (AUD) of a customer's orders", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + ], + ), + MetabaseModel( + name="stg_orders", + schema="PUBLIC", + description="", + model_type=ModelType.nodes, + dbt_name="stg_orders", + source=None, + unique_id="model.jaffle_shop.stg_orders", + columns=[ + MetabaseColumn( + name="ORDER_ID", + description="", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="STATUS", + description="", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + ], + ), + MetabaseModel( + name="stg_payments", + schema="PUBLIC", + description="", + model_type=ModelType.nodes, + dbt_name="stg_payments", + source=None, + unique_id="model.jaffle_shop.stg_payments", + columns=[ + MetabaseColumn( + name="PAYMENT_ID", + description="", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + MetabaseColumn( + name="PAYMENT_METHOD", + description="", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ), + ], + ), + MetabaseModel( + name="stg_customers", + schema="PUBLIC", + description="", + model_type=ModelType.nodes, + dbt_name="stg_customers", + source=None, + unique_id="model.jaffle_shop.stg_customers", + columns=[ + MetabaseColumn( + name="CUSTOMER_ID", + description="", + meta_fields={}, + semantic_type=None, + visibility_type=None, + fk_target_table=None, + fk_target_field=None, + ) + ], + ), + ] + self.assertEqual(models, expectation) + logging.info("Done") diff --git a/tests/test_metabase.py b/tests/test_metabase.py index aa2a7c5d..f09cb068 100644 --- a/tests/test_metabase.py +++ b/tests/test_metabase.py @@ -8,16 +8,15 @@ from dbtmetabase.models.metabase import ( MetabaseModel, MetabaseColumn, - ModelKey, + ModelType, ) MODELS = [ MetabaseModel( - name="ORDERS", + name="orders", schema="PUBLIC", description="This table has basic information about orders, as well as some derived facts based on payments", - model_key=ModelKey.nodes, - ref="ref('orders')", + model_type=ModelType.nodes, columns=[ MetabaseColumn( name="ORDER_ID", @@ -34,7 +33,7 @@ meta_fields={}, semantic_type="type/FK", visibility_type=None, - fk_target_table="PUBLIC.CUSTOMERS", + fk_target_table="PUBLIC.customers", fk_target_field="CUSTOMER_ID", ), MetabaseColumn( @@ -103,11 +102,10 @@ ], ), MetabaseModel( - name="CUSTOMERS", + name="customers", schema="PUBLIC", description="This table has basic information about a customer, as well as some derived facts based on a customer's orders", - model_key=ModelKey.nodes, - ref="ref('customers')", + model_type=ModelType.nodes, columns=[ MetabaseColumn( name="CUSTOMER_ID", @@ -115,7 +113,7 @@ meta_fields={}, semantic_type="type/FK", visibility_type=None, - fk_target_table="PUBLIC.ORDERS", + fk_target_table="PUBLIC.orders", fk_target_field="CUSTOMER_ID", ), MetabaseColumn( @@ -175,11 +173,10 @@ ], ), MetabaseModel( - name="STG_ORDERS", + name="stg_orders", schema="PUBLIC", description="", - model_key=ModelKey.nodes, - ref="ref('stg_orders')", + model_type=ModelType.nodes, columns=[ MetabaseColumn( name="ORDER_ID", @@ -202,11 +199,10 @@ ], ), MetabaseModel( - name="STG_PAYMENTS", + name="stg_payments", schema="PUBLIC", description="", - model_key=ModelKey.nodes, - ref="ref('stg_payments')", + model_type=ModelType.nodes, columns=[ MetabaseColumn( name="PAYMENT_ID", @@ -229,11 +225,10 @@ ], ), MetabaseModel( - name="STG_CUSTOMERS", + name="stg_customers", schema="PUBLIC", description="", - model_key=ModelKey.nodes, - ref="ref('stg_customers')", + model_type=ModelType.nodes, columns=[ MetabaseColumn( name="CUSTOMER_ID", diff --git a/tests/utils_mb_test_suite.py b/tests/utils_mb_test_suite.py index b3caa411..e99deb1c 100644 --- a/tests/utils_mb_test_suite.py +++ b/tests/utils_mb_test_suite.py @@ -6,7 +6,7 @@ from dbtmetabase.models.metabase import ( MetabaseModel, MetabaseColumn, - ModelKey, + ModelType, ) mbc = MetabaseClient( @@ -109,7 +109,7 @@ def rebuild_baseline_exposure_yaml(): name="CUSTOMERS", schema="PUBLIC", description="This table has basic information about a customer, as well as some derived facts based on a customer's orders", - model_key=ModelKey.nodes, + model_type=ModelType.nodes, ref="ref('customers')", columns=[ MetabaseColumn( @@ -181,7 +181,7 @@ def rebuild_baseline_exposure_yaml(): name="ORDERS", schema="PUBLIC", description="This table has basic information about orders, as well as some derived facts based on payments", - model_key=ModelKey.nodes, + model_type=ModelType.nodes, ref="ref('orders')", columns=[ MetabaseColumn( @@ -271,7 +271,7 @@ def rebuild_baseline_exposure_yaml(): name="STG_CUSTOMERS", schema="PUBLIC", description="", - model_key=ModelKey.nodes, + model_type=ModelType.nodes, ref="ref('stg_customers')", columns=[ MetabaseColumn( @@ -289,7 +289,7 @@ def rebuild_baseline_exposure_yaml(): name="STG_ORDERS", schema="PUBLIC", description="", - model_key=ModelKey.nodes, + model_type=ModelType.nodes, ref="ref('stg_orders')", columns=[ MetabaseColumn( @@ -316,7 +316,7 @@ def rebuild_baseline_exposure_yaml(): name="STG_PAYMENTS", schema="PUBLIC", description="", - model_key=ModelKey.nodes, + model_type=ModelType.nodes, ref="ref('stg_payments')", columns=[ MetabaseColumn(